{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "b7cf5c89964dcaee",
   "metadata": {
    "collapsed": false
   },
   "source": [
    "# 特征工程\n",
    "数据集来源于Data Hackathon 3.x，所有的特征处理也只做最基本的参考，可自行尝试更多的特征工程工作，参考github里Feature engineering和Kaggle Titanic的案例。"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "dbea37e0ed0024ec",
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2025-07-16T06:07:54.349477Z",
     "start_time": "2025-07-16T06:07:53.106883Z"
    }
   },
   "outputs": [],
   "source": [
    "\n",
    "import pandas as pd\n",
    "import numpy as np\n",
    "import xgboost as xgb\n",
    "import matplotlib.pyplot as plt\n",
    "%matplotlib inline\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "2e940aba9dda2389",
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2025-07-16T06:09:30.964815Z",
     "start_time": "2025-07-16T06:09:30.551748Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(87020, 26) (37717, 24)\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "C:\\Users\\Administrator\\AppData\\Local\\Temp\\ipykernel_29220\\2042474453.py:1: DtypeWarning: Columns (12,18) have mixed types. Specify dtype option on import or set low_memory=False.\n",
      "  train = pd.read_csv('data/Train.csv',encoding='utf-8')\n"
     ]
    }
   ],
   "source": [
    "train = pd.read_csv('data/Train.csv',encoding='utf-8')\n",
    "test = pd.read_csv('data/Test.csv',encoding='utf-8')\n",
    "print(train.shape, test.shape)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "e6b116ac6d909976",
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2025-07-16T06:09:58.120396Z",
     "start_time": "2025-07-16T06:09:58.073389Z"
    }
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'pandas.core.frame.DataFrame'>\n",
      "RangeIndex: 87020 entries, 0 to 87019\n",
      "Data columns (total 26 columns):\n",
      " #   Column                 Non-Null Count  Dtype  \n",
      "---  ------                 --------------  -----  \n",
      " 0   ID                     87020 non-null  object \n",
      " 1   Gender                 87020 non-null  object \n",
      " 2   City                   86017 non-null  object \n",
      " 3   Monthly_Income         87020 non-null  int64  \n",
      " 4   DOB                    87020 non-null  object \n",
      " 5   Lead_Creation_Date     87020 non-null  object \n",
      " 6   Loan_Amount_Applied    86949 non-null  float64\n",
      " 7   Loan_Tenure_Applied    86949 non-null  float64\n",
      " 8   Existing_EMI           86949 non-null  float64\n",
      " 9   Employer_Name          86949 non-null  object \n",
      " 10  Salary_Account         75256 non-null  object \n",
      " 11  Mobile_Verified        87020 non-null  object \n",
      " 12  Var5                   87020 non-null  object \n",
      " 13  Var1                   87019 non-null  object \n",
      " 14  Loan_Amount_Submitted  52407 non-null  float64\n",
      " 15  Loan_Tenure_Submitted  52407 non-null  float64\n",
      " 16  Interest_Rate          27726 non-null  float64\n",
      " 17  Processing_Fee         27420 non-null  float64\n",
      " 18  EMI_Loan_Submitted     27727 non-null  object \n",
      " 19  Filled_Form            87020 non-null  object \n",
      " 20  Device_Type            87020 non-null  object \n",
      " 21  Var2                   87020 non-null  object \n",
      " 22  Source                 87020 non-null  object \n",
      " 23  Var4                   87020 non-null  int64  \n",
      " 24  LoggedIn               87020 non-null  int64  \n",
      " 25  Disbursed              87019 non-null  float64\n",
      "dtypes: float64(8), int64(3), object(15)\n",
      "memory usage: 17.3+ MB\n"
     ]
    }
   ],
   "source": [
    "train.info()\n"
   ]
  },
  {
   "cell_type": "code",
   "outputs": [
    {
     "data": {
      "text/plain": "            ID  Gender       City  Monthly_Income        DOB  \\\n0  ID000002C20  Female      Delhi           20000  23-May-78   \n1  ID000004E40    Male     Mumbai           35000  07-Oct-85   \n2  ID000007H20    Male  Panchkula           22500  10-Oct-81   \n3  ID000008I30    Male    Saharsa           35000  30-Nov-87   \n4  ID000009J40    Male  Bengaluru          100000  17-Feb-84   \n\n  Lead_Creation_Date  Loan_Amount_Applied  Loan_Tenure_Applied  Existing_EMI  \\\n0          15-May-15             300000.0                  5.0           0.0   \n1          04-May-15             200000.0                  2.0           0.0   \n2          19-May-15             600000.0                  4.0           0.0   \n3          09-May-15            1000000.0                  5.0           0.0   \n4          20-May-15             500000.0                  2.0       25000.0   \n\n                         Employer_Name  ... Interest_Rate Processing_Fee  \\\n0                              CYBOSOL  ...           NaN            NaN   \n1  TATA CONSULTANCY SERVICES LTD (TCS)  ...         13.25            NaN   \n2              ALCHEMIST HOSPITALS LTD  ...           NaN            NaN   \n3                     BIHAR GOVERNMENT  ...           NaN            NaN   \n4                 GLOBAL EDGE SOFTWARE  ...           NaN            NaN   \n\n  EMI_Loan_Submitted Filled_Form  Device_Type  Var2  Source  Var4 LoggedIn  \\\n0                NaN           N  Web-browser     G    S122     1        0   \n1             6762.9           N  Web-browser     G    S122     3        0   \n2                NaN           N  Web-browser     B    S143     1        0   \n3                NaN           N  Web-browser     B    S143     3        0   \n4                NaN           N  Web-browser     B    S134     3        1   \n\n  Disbursed  \n0       0.0  \n1       0.0  \n2       0.0  \n3       0.0  \n4       0.0  \n\n[5 rows x 26 columns]",
      "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>ID</th>\n      <th>Gender</th>\n      <th>City</th>\n      <th>Monthly_Income</th>\n      <th>DOB</th>\n      <th>Lead_Creation_Date</th>\n      <th>Loan_Amount_Applied</th>\n      <th>Loan_Tenure_Applied</th>\n      <th>Existing_EMI</th>\n      <th>Employer_Name</th>\n      <th>...</th>\n      <th>Interest_Rate</th>\n      <th>Processing_Fee</th>\n      <th>EMI_Loan_Submitted</th>\n      <th>Filled_Form</th>\n      <th>Device_Type</th>\n      <th>Var2</th>\n      <th>Source</th>\n      <th>Var4</th>\n      <th>LoggedIn</th>\n      <th>Disbursed</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>0</th>\n      <td>ID000002C20</td>\n      <td>Female</td>\n      <td>Delhi</td>\n      <td>20000</td>\n      <td>23-May-78</td>\n      <td>15-May-15</td>\n      <td>300000.0</td>\n      <td>5.0</td>\n      <td>0.0</td>\n      <td>CYBOSOL</td>\n      <td>...</td>\n      <td>NaN</td>\n      <td>NaN</td>\n      <td>NaN</td>\n      <td>N</td>\n      <td>Web-browser</td>\n      <td>G</td>\n      <td>S122</td>\n      <td>1</td>\n      <td>0</td>\n      <td>0.0</td>\n    </tr>\n    <tr>\n      <th>1</th>\n      <td>ID000004E40</td>\n      <td>Male</td>\n      <td>Mumbai</td>\n      <td>35000</td>\n      <td>07-Oct-85</td>\n      <td>04-May-15</td>\n      <td>200000.0</td>\n      <td>2.0</td>\n      <td>0.0</td>\n      <td>TATA CONSULTANCY SERVICES LTD (TCS)</td>\n      <td>...</td>\n      <td>13.25</td>\n      <td>NaN</td>\n      <td>6762.9</td>\n      <td>N</td>\n      <td>Web-browser</td>\n      <td>G</td>\n      <td>S122</td>\n      <td>3</td>\n      <td>0</td>\n      <td>0.0</td>\n    </tr>\n    <tr>\n      <th>2</th>\n      <td>ID000007H20</td>\n      <td>Male</td>\n      <td>Panchkula</td>\n      <td>22500</td>\n      <td>10-Oct-81</td>\n      <td>19-May-15</td>\n      <td>600000.0</td>\n      <td>4.0</td>\n      <td>0.0</td>\n      <td>ALCHEMIST HOSPITALS LTD</td>\n      <td>...</td>\n      <td>NaN</td>\n      <td>NaN</td>\n      <td>NaN</td>\n      <td>N</td>\n      <td>Web-browser</td>\n      <td>B</td>\n      <td>S143</td>\n      <td>1</td>\n      <td>0</td>\n      <td>0.0</td>\n    </tr>\n    <tr>\n      <th>3</th>\n      <td>ID000008I30</td>\n      <td>Male</td>\n      <td>Saharsa</td>\n      <td>35000</td>\n      <td>30-Nov-87</td>\n      <td>09-May-15</td>\n      <td>1000000.0</td>\n      <td>5.0</td>\n      <td>0.0</td>\n      <td>BIHAR GOVERNMENT</td>\n      <td>...</td>\n      <td>NaN</td>\n      <td>NaN</td>\n      <td>NaN</td>\n      <td>N</td>\n      <td>Web-browser</td>\n      <td>B</td>\n      <td>S143</td>\n      <td>3</td>\n      <td>0</td>\n      <td>0.0</td>\n    </tr>\n    <tr>\n      <th>4</th>\n      <td>ID000009J40</td>\n      <td>Male</td>\n      <td>Bengaluru</td>\n      <td>100000</td>\n      <td>17-Feb-84</td>\n      <td>20-May-15</td>\n      <td>500000.0</td>\n      <td>2.0</td>\n      <td>25000.0</td>\n      <td>GLOBAL EDGE SOFTWARE</td>\n      <td>...</td>\n      <td>NaN</td>\n      <td>NaN</td>\n      <td>NaN</td>\n      <td>N</td>\n      <td>Web-browser</td>\n      <td>B</td>\n      <td>S134</td>\n      <td>3</td>\n      <td>1</td>\n      <td>0.0</td>\n    </tr>\n  </tbody>\n</table>\n<p>5 rows × 26 columns</p>\n</div>"
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "train.head(5)"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2025-07-16T06:10:34.905409Z",
     "start_time": "2025-07-16T06:10:34.886941Z"
    }
   },
   "id": "d0bf4b36c6409b07",
   "execution_count": 6
  },
  {
   "cell_type": "code",
   "outputs": [
    {
     "data": {
      "text/plain": "(124737, 27)"
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#数据合并\n",
    "train['source']='train'\n",
    "test['source']='test'\n",
    "data = pd.concat([train,test],ignore_index=True)\n",
    "data.shape"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2025-07-16T06:12:05.685812Z",
     "start_time": "2025-07-16T06:12:05.661011Z"
    }
   },
   "id": "5473b4b519392b50",
   "execution_count": 7
  },
  {
   "cell_type": "markdown",
   "source": [
    "# 异常数据处理"
   ],
   "metadata": {
    "collapsed": false
   },
   "id": "c8ae53fed834de9b"
  },
  {
   "cell_type": "code",
   "outputs": [
    {
     "data": {
      "text/plain": "ID                           0\nGender                       0\nCity                      1401\nMonthly_Income               0\nDOB                          0\nLead_Creation_Date           0\nLoan_Amount_Applied        111\nLoan_Tenure_Applied        111\nExisting_EMI               111\nEmployer_Name              113\nSalary_Account           16801\nMobile_Verified              0\nVar5                         0\nVar1                         1\nLoan_Amount_Submitted    49535\nLoan_Tenure_Submitted    49535\nInterest_Rate            84901\nProcessing_Fee           85346\nEMI_Loan_Submitted       84900\nFilled_Form                  0\nDevice_Type                  0\nVar2                         0\nSource                       0\nVar4                         0\nLoggedIn                 37717\nDisbursed                37718\nsource                       0\ndtype: int64"
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data.apply(lambda x: sum(x.isnull()))"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2025-07-16T06:13:02.294054Z",
     "start_time": "2025-07-16T06:13:02.033201Z"
    }
   },
   "id": "8d51e5752556e258",
   "execution_count": 8
  },
  {
   "cell_type": "code",
   "outputs": [],
   "source": [
    "# 离散值的分布情况"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2025-07-16T06:14:20.679453Z",
     "start_time": "2025-07-16T06:14:20.676655Z"
    }
   },
   "id": "fe23ef5c2b35a0b9",
   "execution_count": 9
  },
  {
   "cell_type": "code",
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Gender\n",
      "Gender\n",
      "Male      71398\n",
      "Female    53339\n",
      "Name: count, dtype: int64\n",
      "\n",
      "\n",
      "Mobile_Verified\n",
      "Mobile_Verified\n",
      "Y    80928\n",
      "N    43808\n",
      "0        1\n",
      "Name: count, dtype: int64\n",
      "\n",
      "\n",
      "Var1\n",
      "Var1\n",
      "HBXX    84900\n",
      "HBXC    12952\n",
      "HBXB     6502\n",
      "HAXA     4214\n",
      "HBXA     3042\n",
      "HAXB     2879\n",
      "HBXD     2818\n",
      "HAXC     2171\n",
      "HBXH     1387\n",
      "HCXF      990\n",
      "HAYT      710\n",
      "HAVC      570\n",
      "HAXM      386\n",
      "HCXD      348\n",
      "HCYS      318\n",
      "HVYS      252\n",
      "HAZD      161\n",
      "HCXG      114\n",
      "HAXF       22\n",
      "Name: count, dtype: int64\n",
      "\n",
      "\n",
      "Filled_Form\n",
      "Filled_Form\n",
      "N         96739\n",
      "Y         27997\n",
      "Mobile        1\n",
      "Name: count, dtype: int64\n",
      "\n",
      "\n",
      "Device_Type\n",
      "Device_Type\n",
      "Web-browser    92105\n",
      "Mobile         32631\n",
      "G                  1\n",
      "Name: count, dtype: int64\n",
      "\n",
      "\n",
      "Var2\n",
      "Var2\n",
      "B       53481\n",
      "G       47337\n",
      "C       20366\n",
      "E        1855\n",
      "D         918\n",
      "F         770\n",
      "A           9\n",
      "S122        1\n",
      "Name: count, dtype: int64\n",
      "\n",
      "\n",
      "Source\n",
      "Source\n",
      "S122    55248\n",
      "S133    42900\n",
      "S159     7999\n",
      "S143     6140\n",
      "S127     2804\n",
      "S137     2450\n",
      "S134     1900\n",
      "S161     1109\n",
      "S151     1018\n",
      "S157      929\n",
      "S153      705\n",
      "S144      447\n",
      "S156      432\n",
      "S158      294\n",
      "S123      112\n",
      "S141       83\n",
      "S162       60\n",
      "S124       43\n",
      "S150       19\n",
      "S160       11\n",
      "S136        5\n",
      "S155        5\n",
      "S138        5\n",
      "S129        4\n",
      "S139        4\n",
      "S135        2\n",
      "S130        1\n",
      "S125        1\n",
      "S154        1\n",
      "S140        1\n",
      "1           1\n",
      "S142        1\n",
      "S126        1\n",
      "S131        1\n",
      "S132        1\n",
      "Name: count, dtype: int64\n"
     ]
    }
   ],
   "source": [
    "var = ['Gender', 'Mobile_Verified', 'Var1','Filled_Form','Device_Type','Var2','Source']\n",
    "for v in var:\n",
    "    print(v)\n",
    "    print(data[v].value_counts())\n",
    "    print('\\n')"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2025-07-16T06:19:38.237494Z",
     "start_time": "2025-07-16T06:19:38.196817Z"
    }
   },
   "id": "7f62e290e12c367c",
   "execution_count": 16
  },
  {
   "cell_type": "markdown",
   "source": [
    "# 特征工程"
   ],
   "metadata": {
    "collapsed": false
   },
   "id": "62d9fcf4372340f1"
  },
  {
   "cell_type": "code",
   "outputs": [],
   "source": [
    "data.drop('City',axis=1,inplace=True)"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2025-07-16T06:21:16.018363Z",
     "start_time": "2025-07-16T06:21:15.987492Z"
    }
   },
   "id": "a8b94097a88f54e7",
   "execution_count": 18
  },
  {
   "cell_type": "code",
   "outputs": [
    {
     "data": {
      "text/plain": "0    37\n1    30\n2    34\n3    28\n4    31\n5    33\n6    28\n7    40\n8    43\n9    26\nName: Age, dtype: int64"
     },
     "execution_count": 19,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 从出生日期计算出年龄\n",
    "data['Age'] = data['DOB'].apply(lambda x:115-int(x[-2:]))\n",
    "data['Age'].head(10)"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2025-07-16T06:22:33.761617Z",
     "start_time": "2025-07-16T06:22:33.693612Z"
    }
   },
   "id": "6b0d0ee08517122a",
   "execution_count": 19
  },
  {
   "cell_type": "code",
   "outputs": [
    {
     "data": {
      "text/plain": "<Axes: >"
     },
     "execution_count": 26,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "text/plain": "<Figure size 640x480 with 1 Axes>",
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAkIAAAGdCAYAAAD+JxxnAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy81sbWrAAAACXBIWXMAAA9hAAAPYQGoP6dpAABEMUlEQVR4nO3df1hUdd4//ucAMxMgTPxIxhFMTDMLcov2649WEQ0wfpjrtm1K5Fa3a7ehGZhl9w9/bCtumXnfuVZ77W7btiV7b6K76yILloEsA7oYd5Bra61CEogpzgjozDC8Pn90c74esRhrZGTO83FdXDnv85ozr8N1DefZe97njE5EBEREREQaFODrBoiIiIh8hUGIiIiINItBiIiIiDSLQYiIiIg0i0GIiIiINItBiIiIiDSLQYiIiIg0i0GIiIiINCvI1w1c7Xp7e/HZZ58hLCwMOp3O1+0QERGRB0QEZ8+ehcViQUDAl8/7MAgN4LPPPkNcXJyv2yAiIqKv4dNPP0VsbOyXbmcQGkBYWBiAL36R4eHhPu6GiLzJ5XKhrKwMaWlp0Ov1vm6HiLzIbrcjLi5OOY9/GQahAfR9HBYeHs4gRORnXC4XQkJCEB4eziBE5KcGWtbCxdJERESkWQxCREREpFkMQkRERKRZDEJERESkWQxCREREpFkMQkRERKRZDEJERESkWQxCREREpFkMQkSkSW63GxUVFaisrERFRQXcbrevWyIiH2AQIiLNKS4uxtixY5GamopNmzYhNTUVY8eORXFxsa9bI6JBxiBERJpSXFyMe++9F4mJidi3bx+2bduGffv2ITExEffeey/DEJHG6EREfN3E1cxut8NkMsFms/G7xoiGOLfbjbFjxyIxMRE7d+6E2+1GSUkJMjIyEBgYiLlz56KxsRFHjhxBYGCgr9slom/A0/M3Z4SISDP27duHY8eO4ZlnnkFAgPrPX0BAAFatWoWjR49i3759PuqQiAYbgxARaUZraysAICEh4ZLb+8b76ojI/zEIEZFmjBgxAgDQ2Nh4ye194311ROT/GISISDOmTZuG0aNHY/369ejt7VVt6+3tRWFhIeLj4zFt2jQfdUhEg41BiIg0IzAwEC+88AJ27dqFuXPnoqamBufOnUNNTQ3mzp2LXbt2YePGjVwoTaQhQb5ugIhoMM2bNw9vv/02CgoKMH36dGU8Pj4eb7/9NubNm+fD7ohosPHy+QHw8nki/+R2u7F3717s3r0bd999N1JSUjgTRORHPD1/c0aIiDQpMDAQycnJ6OrqQnJyMkMQkUZd9hqhyspKZGdnw2KxQKfTYefOnV9au3jxYuh0OmzevFk17nA4sHTpUkRHRyM0NBRz5szB8ePHVTUdHR3Izc2FyWSCyWRCbm4uzpw5o6ppbm5GdnY2QkNDER0djWXLlsHpdKpqGhoakJycjODgYIwcORLr1q0DJ8GIiIgI+BpBqKurCxMnTsSWLVu+sm7nzp2ora2FxWLpt2358uXYsWMHioqKUFVVhc7OTmRlZam+9HDBggWor69HaWkpSktLUV9fj9zcXGW72+1GZmYmurq6UFVVhaKiImzfvh0FBQVKjd1uR2pqKiwWCw4cOICXXnoJGzduxKZNmy73sImIiMgfyTcAQHbs2NFv/Pjx4zJy5EhpbGyU66+/Xl588UVl25kzZ0Sv10tRUZEy1tLSIgEBAVJaWioiIocOHRIAUlNTo9RYrVYBIIcPHxYRkZKSEgkICJCWlhalZtu2bWI0GsVms4mIyNatW8VkMsn58+eVmsLCQrFYLNLb2+vRMdpsNgGg7JOI/IfT6ZSdO3eK0+n0dStE5GWenr+9vkaot7cXubm5ePLJJ3HLLbf0215XVweXy4W0tDRlzGKxICEhAdXV1UhPT4fVaoXJZMKkSZOUmsmTJ8NkMqG6uhrjx4+H1WpFQkKCasYpPT0dDocDdXV1SElJgdVqRXJyMoxGo6pm1apVOHbsGOLj4/v153A44HA4lMd2ux0A4HK54HK5vtkvh4iuKn3vab63ifyPp+9rrwehn/70pwgKCsKyZcsuub2trQ0GgwERERGq8ZiYGLS1tSk1w4cP7/fc4cOHq2piYmJU2yMiImAwGFQ1o0eP7vc6fdsuFYQKCwuxdu3afuNlZWUICQm55DER0dBWXl7u6xaIyMu6u7s9qvNqEKqrq8N//dd/4eDBg9DpdJf1XBFRPedSz/dGjfzfQukv62/VqlXIz89XHtvtdsTFxSEtLY2XzxP5GZfLhfLycqSmpkKv1/u6HSLyor5PdAbi1SC0b98+tLe3Y9SoUcqY2+1GQUEBNm/ejGPHjsFsNsPpdKKjo0M1K9Te3o6pU6cCAMxmM06cONFv/ydPnlRmdMxmM2pra1XbOzo64HK5VDV9s0MXvg6AfrNJfYxGo+qjtD56vZ5/KIn8FN/fRP7H0/e0V79iIzc3Fx988AHq6+uVH4vFgieffBJ/+ctfAABJSUnQ6/WqqejW1lY0NjYqQWjKlCmw2WzYv3+/UlNbWwubzaaqaWxsVH1LdFlZGYxGI5KSkpSayspK1SX1ZWVlsFgs/T4yIyIiIu257Bmhzs5OfPzxx8rjo0ePor6+HpGRkRg1ahSioqJU9Xq9HmazGePHjwcAmEwmPPLIIygoKEBUVBQiIyOxYsUKJCYm4q677gIATJgwAbNnz8aiRYvw6quvAgB+9KMfISsrS9lPWloabr75ZuTm5uL555/H6dOnsWLFCixatEj5CGvBggVYu3YtfvjDH+KZZ57BkSNHsH79evznf/7nZX90R0RERH7oci9H27t3rwDo97Nw4cJL1l98+byIyLlz5yQvL08iIyMlODhYsrKypLm5WVVz6tQpycnJkbCwMAkLC5OcnBzp6OhQ1TQ1NUlmZqYEBwdLZGSk5OXlqS6VFxH54IMPZNq0aWI0GsVsNsuaNWs8vnRehJfPE/kzXj5P5L88PX/zu8YGwO8aI/JPTqcTL730Et59913MnDkTS5cuhcFg8HVbROQlnp6/vbpGiIhoKFi5ciVCQ0OxYsUKlJSUYMWKFQgNDcXKlSt93RoRDTJ+6SoRacrKlSvx/PPPIyYmBmvXroXRaITD4cDq1avx/PPPAwCee+45H3dJRIOFH40NgB+NEfkPp9OJ0NBQREVF4fjx4xARlJSUICMjAzqdDrGxsTh16hS6urr4MRnREMePxoiILrJ161b09PTg2WefRVCQekI8KCgI69atQ09PD7Zu3eqjDolosDEIEZFmfPLJJwCArKysS27vG++rIyL/xyBERJpxww03AAB27dp1ye194311ROT/uEZoAFwjROQ/uEaISDu4RoiI6CIGgwFPPPEETpw4gdjYWPziF7/A6dOn8Ytf/AKxsbE4ceIEnnjiCYYgIg3h5fNEpCl9l8a/+OKLWLJkiTIeFBSEJ598kpfOE2kMPxobAD8aI/JPvLM0kX/z9PzNGSEi0iSDwYBly5Zh7NixyMjIgF6v93VLROQDXCNEREREmsUgRERERJrFIERERESaxSBEREREmsUgRERERJrFIERERESaxSBEREREmsUgRERERJrFIERERESaxSBEREREmsUgRERERJrFIERERESaxSBEREREmsUgRERERJrFIERERESaxSBEREREmsUgRERERJrFIERERESaxSBEREREmsUgRERERJrFIERERESaxSBEREREmsUgRERERJrFIERERESaxSBEREREmnXZQaiyshLZ2dmwWCzQ6XTYuXOnss3lcuGpp55CYmIiQkNDYbFY8OCDD+Kzzz5T7cPhcGDp0qWIjo5GaGgo5syZg+PHj6tqOjo6kJubC5PJBJPJhNzcXJw5c0ZV09zcjOzsbISGhiI6OhrLli2D0+lU1TQ0NCA5ORnBwcEYOXIk1q1bBxG53MMmIiIiP3TZQairqwsTJ07Eli1b+m3r7u7GwYMH8R//8R84ePAgiouL8Y9//ANz5sxR1S1fvhw7duxAUVERqqqq0NnZiaysLLjdbqVmwYIFqK+vR2lpKUpLS1FfX4/c3Fxlu9vtRmZmJrq6ulBVVYWioiJs374dBQUFSo3dbkdqaiosFgsOHDiAl156CRs3bsSmTZsu97CJiIjIH8k3AEB27NjxlTX79+8XANLU1CQiImfOnBG9Xi9FRUVKTUtLiwQEBEhpaamIiBw6dEgASE1NjVJjtVoFgBw+fFhEREpKSiQgIEBaWlqUmm3btonRaBSbzSYiIlu3bhWTySTnz59XagoLC8VisUhvb69Hx2iz2QSAsk8i8h9Op1N27twpTqfT160QkZd5ev4OutJBy2azQafT4dprrwUA1NXVweVyIS0tTamxWCxISEhAdXU10tPTYbVaYTKZMGnSJKVm8uTJMJlMqK6uxvjx42G1WpGQkACLxaLUpKenw+FwoK6uDikpKbBarUhOTobRaFTVrFq1CseOHUN8fHy/fh0OBxwOh/LYbrcD+OJjP5fL5bXfCxH5Xt97mu9tIv/j6fv6igah8+fP4+mnn8aCBQsQHh4OAGhra4PBYEBERISqNiYmBm1tbUrN8OHD++1v+PDhqpqYmBjV9oiICBgMBlXN6NGj+71O37ZLBaHCwkKsXbu233hZWRlCQkI8OWwiGmLKy8t93QIReVl3d7dHdVcsCLlcLtx///3o7e3F1q1bB6wXEeh0OuXxhf/2Zo3830LpSz0XAFatWoX8/Hzlsd1uR1xcHNLS0pQwR0T+weVyoby8HKmpqdDr9b5uh4i8qO8TnYFckSDkcrlw33334ejRo3j33XdVAcJsNsPpdKKjo0M1K9Te3o6pU6cqNSdOnOi335MnTyozOmazGbW1tartHR0dcLlcqpq+2aELXwdAv9mkPkajUfVRWh+9Xs8/lER+iu9vIv/j6Xva6/cR6gtBR44cwZ49exAVFaXanpSUBL1er5qKbm1tRWNjoxKEpkyZApvNhv379ys1tbW1sNlsqprGxka0trYqNWVlZTAajUhKSlJqKisrVZfUl5WVwWKx9PvIjIiIiLTnsoNQZ2cn6uvrUV9fDwA4evQo6uvr0dzcjJ6eHtx7773429/+hjfffBNutxttbW1oa2tTwojJZMIjjzyCgoICvPPOO3j//ffxwAMPIDExEXfddRcAYMKECZg9ezYWLVqEmpoa1NTUYNGiRcjKysL48eMBAGlpabj55puRm5uL999/H++88w5WrFiBRYsWKTNQCxYsgNFoxA9/+EM0NjZix44dWL9+PfLz87/0ozEiIiLSkMu9HG3v3r0CoN/PwoUL5ejRo5fcBkD27t2r7OPcuXOSl5cnkZGREhwcLFlZWdLc3Kx6nVOnTklOTo6EhYVJWFiY5OTkSEdHh6qmqalJMjMzJTg4WCIjIyUvL091qbyIyAcffCDTpk0To9EoZrNZ1qxZ4/Gl8yK8fJ7In/HyeSL/5en5WyfC2yx/FbvdDpPJBJvNxsXSRH7G5XKhpKQEGRkZXCNE5Gc8PX/zu8aIiIhIsxiEiIiISLMYhIiIiEizGISIiIhIsxiEiIiISLMYhIiIiEizGISIiIhIsxiEiIiISLMYhIiIiEizGISIiIhIsxiEiIiISLMYhIiIiEizGISIiIhIsxiEiIiISLMYhIiIiEizGISIiIhIsxiEiIiISLMYhIiIiEizGISIiIhIsxiEiIiISLMYhIiIiEizGISIiIhIsxiEiIiISLMYhIiIiEizGISIiIhIsxiEiIiISLMYhIiIiEizGISIiIhIsxiEiIiISLMYhIiIiEizGISIiIhIsxiEiIiISLMYhIiIiEizGISIiIhIsxiEiIiISLMYhIiIiEizLjsIVVZWIjs7GxaLBTqdDjt37lRtFxGsWbMGFosFwcHBmDFjBj788ENVjcPhwNKlSxEdHY3Q0FDMmTMHx48fV9V0dHQgNzcXJpMJJpMJubm5OHPmjKqmubkZ2dnZCA0NRXR0NJYtWwan06mqaWhoQHJyMoKDgzFy5EisW7cOInK5h01ERER+6LKDUFdXFyZOnIgtW7Zccvtzzz2HTZs2YcuWLThw4ADMZjNSU1Nx9uxZpWb58uXYsWMHioqKUFVVhc7OTmRlZcHtdis1CxYsQH19PUpLS1FaWor6+nrk5uYq291uNzIzM9HV1YWqqioUFRVh+/btKCgoUGrsdjtSU1NhsVhw4MABvPTSS9i4cSM2bdp0uYdNRERE/ki+AQCyY8cO5XFvb6+YzWbZsGGDMnb+/HkxmUzyyiuviIjImTNnRK/XS1FRkVLT0tIiAQEBUlpaKiIihw4dEgBSU1Oj1FitVgEghw8fFhGRkpISCQgIkJaWFqVm27ZtYjQaxWaziYjI1q1bxWQyyfnz55WawsJCsVgs0tvb69Ex2mw2AaDsk4j8h9PplJ07d4rT6fR1K0TkZZ6ev4O8GaqOHj2KtrY2pKWlKWNGoxHJycmorq7G4sWLUVdXB5fLpaqxWCxISEhAdXU10tPTYbVaYTKZMGnSJKVm8uTJMJlMqK6uxvjx42G1WpGQkACLxaLUpKenw+FwoK6uDikpKbBarUhOTobRaFTVrFq1CseOHUN8fHy/Y3A4HHA4HMpju90OAHC5XHC5XN75RRHRVaHvPc33NpH/8fR97dUg1NbWBgCIiYlRjcfExKCpqUmpMRgMiIiI6FfT9/y2tjYMHz683/6HDx+uqrn4dSIiImAwGFQ1o0eP7vc6fdsuFYQKCwuxdu3afuNlZWUICQm59IET0ZBWXl7u6xaIyMu6u7s9qvNqEOqj0+lUj0Wk39jFLq65VL03auT/Fkp/WT+rVq1Cfn6+8thutyMuLg5paWkIDw//ymMgoqHF5XKhvLwcqamp0Ov1vm6HiLyo7xOdgXg1CJnNZgBfzLaMGDFCGW9vb1dmYsxmM5xOJzo6OlSzQu3t7Zg6dapSc+LEiX77P3nypGo/tbW1qu0dHR1wuVyqmr7ZoQtfB+g/a9XHaDSqPkrro9fr+YeSyI+43W5UV1ejsrISoaGhSElJQWBgoK/bIiIv8fSc7dX7CMXHx8NsNqummZ1OJyoqKpSQk5SUBL1er6ppbW1FY2OjUjNlyhTYbDbs379fqamtrYXNZlPVNDY2orW1VakpKyuD0WhEUlKSUlNZWam6pL6srAwWi6XfR2ZEpB3FxcUYO3YsUlNTsWnTJqSmpmLs2LEoLi72dWtENNgudxX22bNn5f3335f3339fAMimTZvk/fffl6amJhER2bBhg5hMJikuLpaGhgaZP3++jBgxQux2u7KPRx99VGJjY2XPnj1y8OBBmTlzpkycOFF6enqUmtmzZ8utt94qVqtVrFarJCYmSlZWlrK9p6dHEhISZNasWXLw4EHZs2ePxMbGSl5enlJz5swZiYmJkfnz50tDQ4MUFxdLeHi4bNy40ePj5VVjRP5l+/btotPpJDs7W/bt2yfbtm2Tffv2SXZ2tuh0Otm+fbuvWyQiL/D0/H3ZQWjv3r0CoN/PwoULReSLS+hXr14tZrNZjEajTJ8+XRoaGlT7OHfunOTl5UlkZKQEBwdLVlaWNDc3q2pOnTolOTk5EhYWJmFhYZKTkyMdHR2qmqamJsnMzJTg4GCJjIyUvLw81aXyIiIffPCBTJs2TYxGo5jNZlmzZo3Hl86LMAgR+ZOenh4ZPXq0ZGdni9vtVl0+73a7JTs7W+Lj41X/U0ZEQ5On52+dCG+z/FXsdjtMJhNsNhsXSxMNce+9955ya43JkyfD5XKhpKQEGRkZ0Ov1sFqtmDp1Kvbu3YsZM2b4ul0i+gY8PX/zu8aISDP61hQmJCRccnvf+IVrD4nIvzEIEZFm9F3N2tjYeMntfeMXXvVKRP6NQYiINGPatGkYPXo01q9fj97eXtW23t5eFBYWIj4+HtOmTfNRh0Q02BiEiEgzAgMD8cILL2DXrl2YO3cuampqcO7cOdTU1GDu3LnYtWsXNm7cyPsJEWnIFbmzNBHR1WrevHl4++23UVBQgOnTpyvj8fHxePvttzFv3jwfdkdEg41XjQ2AV40R+Se32429e/di9+7duPvuu3lnaSI/4+n5mzNCRKRJgYGBSE5ORldXF5KTkxmCiDSKa4SIiIhIsxiEiIiISLMYhIiIiEizGISIiIhIsxiEiIiISLMYhIiIiEizGISISJPcbjcqKipQWVmJiooKuN1uX7dERD7AIEREmlNcXIyxY8ciNTUVmzZtQmpqKsaOHYvi4mJft0ZEg4xBiIg0pbi4GPfeey8SExOxb98+bNu2Dfv27UNiYiLuvfdehiEijeFXbAyAX7FB5D/cbjfGjh2LxMRE7Ny5E263GyUlJcjIyEBgYCDmzp2LxsZGHDlyhHeaJhriPD1/c0aIiDRj3759OHbsGJ555hmIiGqNkIhg1apVOHr0KPbt2+frVolokDAIEZFmtLa2AgA++eSTS64R+uc//6mqIyL/xyBERJoxYsQIAMADDzxwyTVCDzzwgKqOiPwf1wgNgGuEiPyH0+lEaGgooqKicPz4cYiIskZIp9MhNjYWp06dQldXFwwGg6/bJaJvgGuEiIguUl1djZ6eHrS3t2PevHmoqanBuXPnUFNTg3nz5qG9vR09PT2orq72datENEgYhIhIM/rW/rzxxhtoaGjA9OnTMX/+fEyfPh2NjY144403VHVE5P8YhIhIM/rW/txwww34+OOPUV5ejvz8fJSXl+PIkSMYM2aMqo6I/B/XCA2Aa4SI/AfvI0SkHVwjRER0kcDAQLzwwgvYtWsX5s6dq1ojNHfuXOzatQsbN25kCCLSkCBfN0BENJjmzZuHt99+GwUFBZg+fboyHh8fj7fffhvz5s3zYXdENNj40dgA+NEYkX9yu93Yu3cvdu/ejbvvvhspKSmcCSLyI56evzkjRESaFBgYiOTkZHR1dSE5OZkhiEijuEaIiIiINItBiIiIiDSLQYiIiIg0i0GIiIiINItBiIiIiDSLQYiIiIg0i0GIiIiINMvrQainpwf//u//jvj4eAQHB2PMmDFYt24dent7lRoRwZo1a2CxWBAcHIwZM2bgww8/VO3H4XBg6dKliI6ORmhoKObMmYPjx4+rajo6OpCbmwuTyQSTyYTc3FycOXNGVdPc3Izs7GyEhoYiOjoay5Ytg9Pp9PZhE9EQ43a7UVFRgcrKSlRUVMDtdvu6JSLyBfGyZ599VqKiomTXrl1y9OhR+f3vfy/Dhg2TzZs3KzUbNmyQsLAw2b59uzQ0NMgPfvADGTFihNjtdqXm0UcflZEjR0p5ebkcPHhQUlJSZOLEidLT06PUzJ49WxISEqS6ulqqq6slISFBsrKylO09PT2SkJAgKSkpcvDgQSkvLxeLxSJ5eXkeH4/NZhMAYrPZvuFvhoiuFtu3b5fRo0cLAOVn9OjRsn37dl+3RkRe4un52+tBKDMzUx5++GHV2Lx58+SBBx4QEZHe3l4xm82yYcMGZfv58+fFZDLJK6+8IiIiZ86cEb1eL0VFRUpNS0uLBAQESGlpqYiIHDp0SABITU2NUmO1WgWAHD58WERESkpKJCAgQFpaWpSabdu2idFo9DjYMAgR+Zft27eLTqeT7Oxs2bdvn2zbtk327dsn2dnZotPpGIaI/ISn52+vf8XGd77zHbzyyiv4xz/+gRtvvBH/+7//i6qqKmzevBkAcPToUbS1tSEtLU15jtFoRHJyMqqrq7F48WLU1dXB5XKpaiwWCxISElBdXY309HRYrVaYTCZMmjRJqZk8eTJMJhOqq6sxfvx4WK1WJCQkwGKxKDXp6elwOByoq6tDSkpKv/4dDgccDofy2G63AwBcLhdcLpfXfk9ENPjcbjcKCgqQkZGB3//+93C73Th16hRuv/12/P73v8f3vvc9rFixAhkZGfzKDaIhztNztteD0FNPPQWbzYabbroJgYGBcLvd+MlPfoL58+cDANra2gAAMTExqufFxMSgqalJqTEYDIiIiOhX0/f8trY2DB8+vN/rDx8+XFVz8etERETAYDAoNRcrLCzE2rVr+42XlZUhJCRkwOMnoqtXQ0MDjh07hn/9139FaWmpMl5eXg4AmDZtGv785z9j48aNSExM9FWbROQF3d3dHtV5PQj97ne/w29/+1u89dZbuOWWW1BfX4/ly5fDYrFg4cKFSp1Op1M9T0T6jV3s4ppL1X+dmgutWrUK+fn5ymO73Y64uDikpaXx2+eJhri+Gd5FixZh2LBhcLlcKC8vR2pqKvR6PaZNm4ann34a119/PTIyMnzcLRF9E33v94F4PQg9+eSTePrpp3H//fcDABITE9HU1ITCwkIsXLgQZrMZwBezNSNGjFCe197erszemM1mOJ1OdHR0qGaF2tvbMXXqVKXmxIkT/V7/5MmTqv3U1taqtnd0dMDlcvWbKepjNBphNBr7jev1euj1eo9/D0R09YmLiwMAfPTRR5g8ebIy3vf+/uijj5Q6vt+JhjZP38Nev3y+u7sbAQHq3QYGBiqXz8fHx8NsNitT0QDgdDpRUVGhhJykpCTo9XpVTWtrKxobG5WaKVOmwGazYf/+/UpNbW0tbDabqqaxsRGtra1KTVlZGYxGI5KSkrx85ER0tZs2bRpGjx6N9evXq27pAQC9vb0oLCxEfHw8pk2b5qMOiWjQeXuV9sKFC2XkyJHK5fPFxcUSHR0tK1euVGo2bNggJpNJiouLpaGhQebPn3/Jy+djY2Nlz549cvDgQZk5c+YlL5+/9dZbxWq1itVqlcTExEtePj9r1iw5ePCg7NmzR2JjY3n5PJGGXXjVWGVlpWzbtk0qKyt51RiRn/HZ5fN2u10ef/xxGTVqlFxzzTUyZswY+bd/+zdxOBxKTW9vr6xevVrMZrMYjUaZPn26NDQ0qPZz7tw5ycvLk8jISAkODpasrCxpbm5W1Zw6dUpycnIkLCxMwsLCJCcnRzo6OlQ1TU1NkpmZKcHBwRIZGSl5eXly/vx5j4+HQYjI/1zqPkLx8fEMQUR+xNPzt05ExIcTUlc9u90Ok8kEm83GxdJEfsTtdmPv3r3YvXs37r77bqSkpPCSeSI/4un52+uLpYmIhoLAwEAkJyejq6sLycnJDEFEGsUvXSUiIiLNYhAiIiIizWIQIiIiIs1iECIiIiLNYhAiIiIizWIQIiIiIs1iECIiIiLNYhAiIiIizWIQIiIiIs1iECIiIiLNYhAiIiIizWIQIiJNOnfuHJYtW4Y1a9Zg2bJlOHfunK9bIiIf4LfPD4DfPk/kf+bOnYs//OEP/cbvuece7Ny5c/AbIiKv8/T8zRkhItKUvhBkMBiwcuVKvPzyy1i5ciUMBgP+8Ic/YO7cub5ukYgGEWeEBsAZISL/ce7cOYSEhMBgMODs2bPQ6XQoKSlBRkYGRARhYWFwOp3o7u5GcHCwr9slom+AM0JERBd58sknAQD5+fkwGAyqbQaDAcuXL1fVEZH/YxAiIs04cuQIAOBf/uVfLrn9kUceUdURkf9jECIizRg3bhwA4Be/+AVOnz6Nb33rW8jNzcW3vvUtnD59Gr/85S9VdUTk/7hGaABcI0TkP/rWCA2Ea4SIhj6uESIiukhwcHC/tUEXMxgMDEFEGsIgRESacfr0aTidzq+scTqdOH369CB1RES+xiBERJpx5513Kv+OjIzE9OnTccstt2D69OmIjIy8ZB0R+bcgXzdARDRY+q4G0+l0OHHiBEREuY+QTqeDwWCAiPCqMSIN4YwQEWlGb28vAMBsNiMoSP3/gUFBQYiJiVHVEZH/YxAiIs2IiooCALS2tqKzs1O1rbOzE21tbao6IvJ/DEJEpBl9d44GgLCwMCQmJuL1119HYmIiwsLCLllHRP6N9xEaAO8jROQ/nE4njEbjgHUOh2PAy+yJ6OrG+wgREV3EYDDgnnvu+cqae+65hyGISEMYhIhIM9xuN6xW61fWWK1WuN3uQeqIiHyNQYiINOO9995De3s7gC9mh2bOnInp06dj5syZyixQe3s73nvvPR92SUSDifcRIiLNKCsrAwDo9XqcPXsWOp1OuY+QiGDYsGFwuVwoKyvDrFmzfNwtEQ0GzggRkWbs2bMHAPC9730PgYGBqKioQGVlJSoqKhAYGIjvfve7qjoi8n+cESIizdDpdACAAwcOYMyYMWhubgYAbNq0CaNGjUJgYKCqjoj8H4MQEWnGt7/9bdTV1eGTTz5BQIB6Qvz48ePKHaW//e1v+6I9IvIB3kdoALyPEJH/6OzsVN048cucPXsWw4YNG4SOiOhK8el9hFpaWvDAAw8gKioKISEh+Na3voW6ujplu4hgzZo1sFgsCA4OxowZM/Dhhx+q9uFwOLB06VJER0cjNDQUc+bMwfHjx1U1HR0dyM3NhclkgslkQm5uLs6cOaOqaW5uRnZ2NkJDQxEdHY1ly5bB6XReicMmoqtcbW2tV+uIaOjzehDq6OjAnXfeCb1ej927d+PQoUN44YUXcO211yo1zz33HDZt2oQtW7bgwIEDMJvNSE1NxdmzZ5Wa5cuXY8eOHSgqKkJVVRU6OzuRlZWlur/HggULUF9fj9LSUpSWlqK+vh65ubnKdrfbjczMTHR1daGqqgpFRUXYvn07CgoKvH3YRDQEeHpZPC+fJ9IQ8bKnnnpKvvOd73zp9t7eXjGbzbJhwwZl7Pz582IymeSVV14REZEzZ86IXq+XoqIipaalpUUCAgKktLRUREQOHTokAKSmpkapsVqtAkAOHz4sIiIlJSUSEBAgLS0tSs22bdvEaDSKzWbz6HhsNpsA8LieiK5e+fn5AkAAyDXXXKP8++LH+fn5vm6ViL4hT8/fXl8s/cc//hHp6en4/ve/j4qKCowcORJLlizBokWLAABHjx5FW1sb0tLSlOcYjUYkJyejuroaixcvRl1dHVwul6rGYrEgISEB1dXVSE9Ph9VqhclkwqRJk5SayZMnw2Qyobq6GuPHj4fVakVCQgIsFotSk56eDofDgbq6OqSkpPTr3+FwwOFwKI/tdjsAwOVyweVyee8XRUSDbvfu3cq/z58/r9p24ePdu3djw4YNg9YXEXmfp+dsrwehf/7zn3j55ZeRn5+PZ555Bvv378eyZctgNBrx4IMPoq2tDQAQExOjel5MTAyampoAAG1tbTAYDIiIiOhX0/f8trY2DB8+vN/rDx8+XFVz8etERETAYDAoNRcrLCzE2rVr+42XlZUhJCTEk18BEV2lLl5n+FV1JSUlV7gbIrqSuru7ParzehDq7e3FHXfcgfXr1wMAbrvtNnz44Yd4+eWX8eCDDyp1F9+nQ0QGvHfHxTWXqv86NRdatWoV8vPzlcd2ux1xcXFIS0vjVWNEQ1xYWJhqLeJX1WVkZAxCR0R0pfR9ojMQrwehESNG4Oabb1aNTZgwAdu3bwcAmM1mAF/M1owYMUKpaW9vV2ZvzGYznE4nOjo6VLNC7e3tmDp1qlJz4sSJfq9/8uRJ1X4uvvqjo6MDLper30xRH6PRCKPR2G9cr9dDr9d/9cET0VVt7Nix+OyzzwAAgYGBqosvLnw8duxYvt+JhjhP38Nev2rszjvvxEcffaQa+8c//oHrr78eABAfHw+z2Yzy8nJlu9PpREVFhRJykpKSoNfrVTWtra1obGxUaqZMmQKbzYb9+/crNbW1tbDZbKqaxsZGtLa2KjVlZWUwGo1ISkry8pET0dXu1KlTyr8v/ob5Cx9fWEdEfs7bq7T3798vQUFB8pOf/ESOHDkib775poSEhMhvf/tbpWbDhg1iMpmkuLhYGhoaZP78+TJixAix2+1KzaOPPiqxsbGyZ88eOXjwoMycOVMmTpwoPT09Ss3s2bPl1ltvFavVKlarVRITEyUrK0vZ3tPTIwkJCTJr1iw5ePCg7NmzR2JjYyUvL8/j4+FVY0T+Y8qUKaorxb7sZ8qUKb5ulYi+IU/P314PQiIif/rTnyQhIUGMRqPcdNNN8vOf/1y1vbe3V1avXi1ms1mMRqNMnz5dGhoaVDXnzp2TvLw8iYyMlODgYMnKypLm5mZVzalTpyQnJ0fCwsIkLCxMcnJypKOjQ1XT1NQkmZmZEhwcLJGRkZKXlyfnz5/3+FgYhIj8x0MPPeRREHrooYd83SoRfUOenr/5FRsD4FdsEPmPpKQkHDx4cMC622+/XXU3fCIaenz6FRtERFej9vZ2r9YR0dDHIEREmtF3xZi36oho6GMQIiLN6O3t9WodEQ19DEJERESkWQxCRKQZJpPJq3VENPQxCBGRZlx4N3tv1BHR0McgRESacfEXOX/TOiIa+hiEiEgzJk6c6NU6Ihr6GISISDNuvfVWr9YR0dDHO0sPgHeWJvIfo0aNwqeffjpgXVxcHJqbmwehIyK6UnhnaSKii/CGikR0MQYhItIMt9vt1ToiGvoYhIhIMwICPPuT52kdEQ19fLcTERGRZjEIEZFm8LvGiOhiDEJERESkWQxCREREpFkMQkRERKRZDEJERESkWQxCREREpFkMQkRERKRZDEJERESkWQxCREREpFkMQkRERKRZDEJERESkWQxCREREpFkMQkRERKRZDEJERESkWQxCREREpFkMQkRERKRZDEJERESkWQxCREREpFkMQkRERKRZDEJERESkWQxCREREpFlXPAgVFhZCp9Nh+fLlypiIYM2aNbBYLAgODsaMGTPw4Ycfqp7ncDiwdOlSREdHIzQ0FHPmzMHx48dVNR0dHcjNzYXJZILJZEJubi7OnDmjqmlubkZ2djZCQ0MRHR2NZcuWwel0XqnDJSIioiHkigahAwcO4Oc//zluvfVW1fhzzz2HTZs2YcuWLThw4ADMZjNSU1Nx9uxZpWb58uXYsWMHioqKUFVVhc7OTmRlZcHtdis1CxYsQH19PUpLS1FaWor6+nrk5uYq291uNzIzM9HV1YWqqioUFRVh+/btKCgouJKHTUREREOFXCFnz56VcePGSXl5uSQnJ8vjjz8uIiK9vb1iNptlw4YNSu358+fFZDLJK6+8IiIiZ86cEb1eL0VFRUpNS0uLBAQESGlpqYiIHDp0SABITU2NUmO1WgWAHD58WERESkpKJCAgQFpaWpSabdu2idFoFJvN5tFx2Gw2AeBxPRFdvQB4/ENEQ5un5++gKxWwHnvsMWRmZuKuu+7Cs88+q4wfPXoUbW1tSEtLU8aMRiOSk5NRXV2NxYsXo66uDi6XS1VjsViQkJCA6upqpKenw2q1wmQyYdKkSUrN5MmTYTKZUF1djfHjx8NqtSIhIQEWi0WpSU9Ph8PhQF1dHVJSUvr17XA44HA4lMd2ux0A4HK54HK5vPPLIaKrHt/vREObp+/hKxKEioqKcPDgQRw4cKDftra2NgBATEyMajwmJgZNTU1KjcFgQERERL+avue3tbVh+PDh/fY/fPhwVc3FrxMREQGDwaDUXKywsBBr167tN15WVoaQkJBLPoeI/E9JSYmvWyCib6C7u9ujOq8HoU8//RSPP/44ysrKcM0113xpnU6nUz0WkX5jF7u45lL1X6fmQqtWrUJ+fr7y2G63Iy4uDmlpaQgPD//K/ojIf2RkZPi6BSL6Bvo+0RmI14NQXV0d2tvbkZSUpIy53W5UVlZiy5Yt+OijjwB8MVszYsQIpaa9vV2ZvTGbzXA6nejo6FDNCrW3t2Pq1KlKzYkTJ/q9/smTJ1X7qa2tVW3v6OiAy+XqN1PUx2g0wmg09hvX6/XQ6/Ue/Q6IaOjj+51oaPP0Pez1q8ZmzZqFhoYG1NfXKz933HEHcnJyUF9fjzFjxsBsNqO8vFx5jtPpREVFhRJykpKSoNfrVTWtra1obGxUaqZMmQKbzYb9+/crNbW1tbDZbKqaxsZGtLa2KjVlZWUwGo2qoEZERETa5PUZobCwMCQkJKjGQkNDERUVpYwvX74c69evx7hx4zBu3DisX78eISEhWLBgAQDAZDLhkUceQUFBAaKiohAZGYkVK1YgMTERd911FwBgwoQJmD17NhYtWoRXX30VAPCjH/0IWVlZGD9+PAAgLS0NN998M3Jzc/H888/j9OnTWLFiBRYtWsSPuYiIiOjKLJYeyMqVK3Hu3DksWbIEHR0dmDRpEsrKyhAWFqbUvPjiiwgKCsJ9992Hc+fOYdasWfj1r3+NwMBApebNN9/EsmXLlKvL5syZgy1btijbAwMD8ec//xlLlizBnXfeieDgYCxYsAAbN24cvIMlIiKiq5ZORMTXTVzN7HY7TCYTbDYbZ5GIhriBLsi4EP80Eg1tnp6/+V1jREREpFkMQkRERKRZDEJERESkWQxCREREpFkMQkRERKRZDEJERESkWQxCREREpFkMQkRERKRZDEJERESkWQxCREREpFkMQkRERKRZDEJERESkWQxCREREpFkMQkRERKRZDEJERESkWQxCREREpFkMQkRERKRZDEJERESkWQxCREREpFkMQkRERKRZDEJERESkWQxCREREpFkMQkRERKRZDEJERESkWQxCREREpFkMQkRERKRZDEJERESkWQxCREREpFkMQkRERKRZDEJERESkWQxCREREpFkMQkRERKRZDEJERESkWUG+boCI6HJ0d3fj8OHDV/x1Dh48+LWed9NNNyEkJMTL3RDRlcIgRERDyuHDh5GUlHTFX+frvkZdXR1uv/12L3dDRFcKgxARDSk33XQT6urqvtZzLyfcfN3XuOmmm77W84jIR8TL1q9fL3fccYcMGzZMrrvuOrnnnnvk8OHDqpre3l5ZvXq1jBgxQq655hpJTk6WxsZGVc358+clLy9PoqKiJCQkRLKzs+XTTz9V1Zw+fVoeeOABCQ8Pl/DwcHnggQeko6NDVdPU1CRZWVkSEhIiUVFRsnTpUnE4HB4fj81mEwBis9ku7xdBRFedjRs3CoABfzZu3OjrVonoG/L0/O31xdIVFRV47LHHUFNTg/LycvT09CAtLQ1dXV1KzXPPPYdNmzZhy5YtOHDgAMxmM1JTU3H27FmlZvny5dixYweKiopQVVWFzs5OZGVlwe12KzULFixAfX09SktLUVpaivr6euTm5irb3W43MjMz0dXVhaqqKhQVFWH79u0oKCjw9mET0RDg6XuffyOINORKJ7L29nYBIBUVFSLyxWyQ2WyWDRs2KDXnz58Xk8kkr7zyioiInDlzRvR6vRQVFSk1LS0tEhAQIKWlpSIicujQIQEgNTU1So3VahUAygxUSUmJBAQESEtLi1Kzbds2MRqNHs/wcEaIyP/gK2aDiMg/eHr+vuJrhGw2GwAgMjISAHD06FG0tbUhLS1NqTEajUhOTkZ1dTUWL16Muro6uFwuVY3FYkFCQgKqq6uRnp4Oq9UKk8mESZMmKTWTJ0+GyWRCdXU1xo8fD6vVioSEBFgsFqUmPT0dDocDdXV1SElJ6devw+GAw+FQHtvtdgCAy+WCy+Xy0m+FiHzJ6XTixRdfxFNPPaWM/fSnP8UTTzzB9zmRn/D0vXxFg5CIID8/H9/5zneQkJAAAGhrawMAxMTEqGpjYmLQ1NSk1BgMBkRERPSr6Xt+W1sbhg8f3u81hw8frqq5+HUiIiJgMBiUmosVFhZi7dq1/cbLysp4SSyRHxk/fjxe+u1ObGwIworEHsQNA0pKSnzdFhF5SXd3t0d1VzQI5eXl4YMPPkBVVVW/bTqdTvVYRPqNXezimkvVf52aC61atQr5+fnKY7vdjri4OKSlpSE8PPwr+yOioeV/m08DDX/D5MmTMXFUpK/bISIv6vtEZyBXLAgtXboUf/zjH1FZWYnY2Fhl3Gw2A/hitmbEiBHKeHt7uzJ7Yzab4XQ60dHRoZoVam9vx9SpU5WaEydO9HvdkydPqvZTW1ur2t7R0QGXy9VvpqiP0WiE0WjsN67X66HX6z06diIaGoKCgpT/8v1N5F88fU97/aoxEUFeXh6Ki4vx7rvvIj4+XrU9Pj4eZrMZ5eXlypjT6URFRYUScpKSkqDX61U1ra2taGxsVGqmTJkCm82G/fv3KzW1tbWw2WyqmsbGRrS2tio1ZWVlMBqNg3JDNiIiIrq6eX1G6LHHHsNbb72FP/zhDwgLC1PW4phMJgQHB0On02H58uVYv349xo0bh3HjxmH9+vUICQnBggULlNpHHnkEBQUFiIqKQmRkJFasWIHExETcddddAIAJEyZg9uzZWLRoEV599VUAwI9+9CNkZWVh/PjxAIC0tDTcfPPNyM3NxfPPP4/Tp09jxYoVWLRoET/mIiIiIu9fK4ovuST1tddeU2r6bqhoNpvFaDTK9OnTpaGhQbWfc+fOSV5enkRGRkpwcLBkZWVJc3OzqubUqVOSk5MjYWFhEhYWJjk5OZe8oWJmZqYEBwdLZGSk5OXlyfnz5z0+Hl4+T+S/3j/2uVz/1C55/9jnvm6FiLzM0/O3TkTEdzHs6me322EymWCz2TiLRORn6ptOYe7LNdj5r5PxreujfN0OEXmRp+dvr68RIiIiIhoqGISIiIhIsxiEiIiISLMYhIiIiEizGISIiIhIsxiEiIiISLMYhIiIiEizGISIiIhIsxiEiIiISLOu2LfPExFd7OjnXehy9Pi6DcUnJ7uU//Z9E/3VINQYhPjoUF+3QaQJV887n4j82tHPu5Cy8T1ft3FJBW83+LqFfvaumMEwRDQIGISIaFD0zQRt/sG3MHb4MB9384Wucw7ses+KrBlTEBps9HU7AICP2zux/Hf1V9XMGZE/YxAiokE1dvgwJIw0+boNAIDL5ULbdcDt10dAr9f7uh0i8gEuliYiIiLNYhAiIiIizWIQIiIiIs1iECIiIiLN4mJpIho0uiA7jto/QsA1V8dVYz09Pfis5zP8/fTfr5r7CB21d0IXZPd1G0SacXW884lIE/TX1uKZ/et93UY/W0u3+roFFf21swBk+LoNIk1gECKiQeM6MwkvZC7ADVfJfYR6enrw16q/4s7v3HnVzAh90t6JZW9+4us2iDTj6njnE5EmSE844sPH4+aoq+c+QkeDjmJC5ISr5j5CvedtkJ6Tvm6DSDO4WJqIiIg0i0GIiIiINItBiIiIiDSLa4SIaFCcc7kBAI0tNh938v/rOufA304C5qaOq+pLV4lo8DAIEdGg+OT/TvBPFzf4uJOLBeGNjw/4uol+Qo3880w0GPhOI6JBkXaLGQBww/BhCNYH+ribL3zUakPB2w144d5EjB9xdVzJBnwRguKjQ33dBpEmMAgR0aCIDDXg/v9vlK/bUOnp6QEA3HBdKBJGXj1BiIgGDxdLExERkWYxCBEREZFmMQgRERGRZjEIERERkWYxCBEREZFmMQgRERGRZjEIERERkWYxCBEREZFmaeKGilu3bsXzzz+P1tZW3HLLLdi8eTOmTZvm67aI6Gvo7u7G4cOHvbKvj1rPwNH2Mf7eGIzeU9d6ZZ833XQTQkJCvLIvIrry/D4I/e53v8Py5cuxdetW3HnnnXj11Vdx991349ChQxg16uq6yy0RDezw4cNISkry6j4XvO69fdXV1eH222/33g6J6IrSiYj4uokradKkSbj99tvx8ssvK2MTJkzA3LlzUVhYOODz7XY7TCYTbDYbwsPDr2SrROQBb84IdZ5z4M97rchMmYJhXvr2ec4IEV0dPD1/+/WMkNPpRF1dHZ5++mnVeFpaGqqrqy/5HIfDAYfDoTy22+0AAJfLBZfLdeWaJSKP6PV6JCYmemVfLpcLHZ+3447bJkKv13tln337JSLf8vR96NdB6PPPP4fb7UZMTIxqPCYmBm1tbZd8TmFhIdauXdtvvKysjP+XR+SnysvLfd0CEXlZd3e3R3V+HYT66HQ61WMR6TfWZ9WqVcjPz1ce2+12xMXFIS0tjR+NEfkZl8uF8vJypKamenVGiIh8r+8TnYH4dRCKjo5GYGBgv9mf9vb2frNEfYxGI4zG/msF9Ho9/1AS+Sm+v4n8j6fvab++j5DBYEBSUlK/ae/y8nJMnTrVR10RERHR1cKvZ4QAID8/H7m5ubjjjjswZcoU/PznP0dzczMeffRRX7dGREREPub3QegHP/gBTp06hXXr1qG1tRUJCQkoKSnB9ddf7+vWiIiIyMf8PggBwJIlS7BkyRJft0FERERXGb9eI0RERET0VRiEiIiISLMYhIiIiEizGISIiIhIsxiEiIiISLMYhIiIiEizNHH5/DchIgA8/84SIho6XC4Xuru7Ybfb+RUbRH6m77zddx7/MgxCAzh79iwAIC4uzsedEBER0eU6e/YsTCbTl27XyUBRSeN6e3vx2WefISws7Eu/sZ6Ihia73Y64uDh8+umnCA8P93U7RORFIoKzZ8/CYrEgIODLVwIxCBGRZtntdphMJthsNgYhIo3iYmkiIiLSLAYhIiIi0iwGISLSLKPRiNWrV8NoNPq6FSLyEa4RIiIiIs3ijBARERFpFoMQERERaRaDEBEREWkWgxAR0Td07Ngx6HQ61NfXe33fM2bMwPLly72+3y/z61//Gtdee+2gvR6RrzEIEWnAD3/4Q+h0un4/s2fPBgCMHj0aOp0ORUVF/Z57yy23QKfT4de//rUyNnr0aGzevNmj176cWl/o6urCU089hTFjxuCaa67BddddhxkzZmDXrl2+bg0AUFxcjB//+MfK40v9PhleiL4+ftcYkUbMnj0br732mmrswsvG4+Li8Nprr+H+++9XxmpqatDW1obQ0NBB63OwPfroo9i/fz+2bNmCm2++GadOnUJ1dTVOnTrl69YAAJGRkb5ugcivcUaISCOMRiPMZrPqJyIiQtmek5ODiooKfPrpp8rYr371K+Tk5CAo6Mr9P9PLL7+MG264AQaDAePHj8cbb7yh2r5p0yYkJiYiNDQUcXFxWLJkCTo7O5XtfbMhf/nLXzBhwgQMGzYMs2fPRmtrq0ev/6c//QnPPPMMMjIyMHr0aCQlJWHp0qVYuHChUqPT6bBz507V86699lrVLBkAHD58GFOnTsU111yDW265Be+9956y7b333oNOp8Nf/vIX3HbbbQgODsbMmTPR3t6O3bt3Y8KECQgPD8f8+fPR3d2tPO/Cj8ZmzJiBpqYmPPHEE8qs3nvvvYeHHnoINptNGVuzZg0AwOl0YuXKlRg5ciRCQ0MxadIkVU99v79Ro0YhJCQE3/3ud6+aAEg0WBiEiAgAEBMTg/T0dLz++usAgO7ubvzud7/Dww8/fMVec8eOHXj88cdRUFCAxsZGLF68GA899BD27t2r1AQEBOC///u/0djYiNdffx3vvvsuVq5cqdpPd3c3Nm7ciDfeeAOVlZVobm7GihUrPOrBbDajpKQEZ8+e/cbH8+STT6KgoADvv/8+pk6dijlz5vQLFmvWrMGWLVtQXV2NTz/9FPfddx82b96Mt956C3/+859RXl6Ol1566ZL7Ly4uRmxsLNatW4fW1la0trZi6tSp2Lx5M8LDw5WxvmN/6KGH8Ne//hVFRUX44IMP8P3vfx+zZ8/GkSNHAAC1tbV4+OGHsWTJEtTX1yMlJQXPPvvsN/49EA0pQkR+b+HChRIYGCihoaGqn3Xr1omIyPXXXy8vvvii7Ny5U2644Qbp7e2V119/XW677TYRETGZTPLaa68p++ur98RX1U6dOlUWLVqkGvv+978vGRkZX7q///mf/5GoqCjl8WuvvSYA5OOPP1bGfvazn0lMTIxH/VVUVEhsbKzo9Xq54447ZPny5VJVVaWqASA7duxQjV34Ozl69KgAkA0bNijbXS6XxMbGyk9/+lMREdm7d68AkD179ig1hYWFAkA++eQTZWzx4sWSnp6uPE5OTpbHH39ceXyp3+drr70mJpNJNfbxxx+LTqeTlpYW1fisWbNk1apVIiIyf/58mT17tmr7D37wg377IvJnnBEi0oiUlBTU19erfh577DFVTWZmJjo7O1FZWYlf/epXV3Q2CAD+/ve/484771SN3Xnnnfj73/+uPN67dy9SU1MxcuRIhIWF4cEHH8SpU6fQ1dWl1ISEhOCGG25QHo8YMQLt7e0e9TB9+nT885//xDvvvIPvfe97+PDDDzFt2jTVAmVPTZkyRfl3UFAQ7rjjDtWxAMCtt96q/DsmJgYhISEYM2aMaszT3r/KwYMHISK48cYbMWzYMOWnoqICn3zyCYAvfv8X9nzxMRBpARdLE2lEaGgoxo4d+5U1QUFByM3NxerVq1FbW4sdO3Zc8b50Op3qsYgoY01NTcjIyMCjjz6KH//4x4iMjERVVRUeeeQRuFwu5Tl6vb7fPuUyvj1Ir9dj2rRpmDZtGp5++mk8++yzWLduHZ566ikYDIZL7u/C17+c47uwV51Od8nee3t7Pe79y/T29iIwMBB1dXUIDAxUbRs2bBgAXNbviMhfcUaIiFQefvhhVFRU4J577lEtpr4SJkyYgKqqKtVYdXU1JkyYAAD429/+hp6eHrzwwguYPHkybrzxRnz22WdXtCcAuPnmm9HT04Pz588DAK677jrV4usjR46oFjT3qampUf7d09ODuro63HTTTV7tzWAwwO12Dzh22223we12o729HWPHjlX9mM1mAF8c54U9X3wMRFrAGSEijXA4HGhra1ONBQUFITo6WjU2YcIEfP755wgJCfHaa7e0tPS72eCoUaPw5JNP4r777sPtt9+OWbNm4U9/+hOKi4uxZ88eAMANN9yAnp4evPTSS8jOzsZf//pXvPLKK17rC/jiSqz58+fjjjvuQFRUFA4dOoRnnnkGKSkpCA8PBwDMnDkTW7ZsweTJk9Hb24unnnqq30wOAPzsZz/DuHHjMGHCBLz44ovo6Ojw+seLo0ePRmVlJe6//34YjUZER0dj9OjR6OzsxDvvvIOJEyciJCQEN954I3JycvDggw/ihRdewG233YbPP/8c7777LhITE5GRkYFly5Zh6tSpeO655zB37lyUlZWhtLTUq/0SXfV8ukKJiAbFwoULBUC/n/Hjx4vIwIufv+li6Uu9dt/+tm7dKmPGjBG9Xi833nij/OY3v1E9f9OmTTJixAgJDg6W9PR0+c1vfiMApKOjQ0QuvVB4x44d4umft/Xr18uUKVMkMjJSrrnmGhkzZowsW7ZMPv/8c6WmpaVF0tLSJDQ0VMaNGyclJSWXXCz91ltvyaRJk8RgMMiECRPknXfeUfbRt1i6r+8v63316tUyceJE5fHFi6WtVqvceuutYjQaVcf46KOPSlRUlACQ1atXi4iI0+mU//zP/5TRo0eLXq8Xs9ks3/3ud+WDDz5QnvfLX/5SYmNjJTg4WLKzs2Xjxo1cLE2aohPhh8RERESkTVwjRERERJrFIEREX9ubb76pujT7wp9bbrnF1+0BwJf2N2zYMOzbt8/X7RGRj/GjMSL62s6ePYsTJ05ccpter8f1118/yB319/HHH3/ptpEjRyI4OHgQuyGiqw2DEBEREWkWPxojIiIizWIQIiIiIs1iECIiIiLNYhAiIiIizWIQIiIiIs1iECIiIiLNYhAiIiIizWIQIiIiIs36fwW4kpmxoLe4AAAAAElFTkSuQmCC"
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "data['EMI_Loan_Submitted'].head(10)\n",
    "data['EMI_Loan_Submitted'] = pd.to_numeric(data['EMI_Loan_Submitted'], errors='coerce')\n",
    "data.boxplot(column=['EMI_Loan_Submitted'],return_type='axes')"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2025-07-16T06:28:36.294246Z",
     "start_time": "2025-07-16T06:28:36.155210Z"
    }
   },
   "id": "413232a44be80089",
   "execution_count": 26
  },
  {
   "cell_type": "code",
   "outputs": [
    {
     "data": {
      "text/plain": "   EMI_Loan_Submitted  EMI_Loan_Submitted_Missing\n0                 NaN                           1\n1             6762.90                           0\n2                 NaN                           1\n3                 NaN                           1\n4                 NaN                           1\n5             6978.92                           0\n6                 NaN                           1\n7                 NaN                           1\n8            30824.65                           0\n9            10883.38                           0",
      "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>EMI_Loan_Submitted</th>\n      <th>EMI_Loan_Submitted_Missing</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>0</th>\n      <td>NaN</td>\n      <td>1</td>\n    </tr>\n    <tr>\n      <th>1</th>\n      <td>6762.90</td>\n      <td>0</td>\n    </tr>\n    <tr>\n      <th>2</th>\n      <td>NaN</td>\n      <td>1</td>\n    </tr>\n    <tr>\n      <th>3</th>\n      <td>NaN</td>\n      <td>1</td>\n    </tr>\n    <tr>\n      <th>4</th>\n      <td>NaN</td>\n      <td>1</td>\n    </tr>\n    <tr>\n      <th>5</th>\n      <td>6978.92</td>\n      <td>0</td>\n    </tr>\n    <tr>\n      <th>6</th>\n      <td>NaN</td>\n      <td>1</td>\n    </tr>\n    <tr>\n      <th>7</th>\n      <td>NaN</td>\n      <td>1</td>\n    </tr>\n    <tr>\n      <th>8</th>\n      <td>30824.65</td>\n      <td>0</td>\n    </tr>\n    <tr>\n      <th>9</th>\n      <td>10883.38</td>\n      <td>0</td>\n    </tr>\n  </tbody>\n</table>\n</div>"
     },
     "execution_count": 27,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#好像缺失值比较多，干脆就开一个新的字段，表明是缺失值还是不是缺失值\n",
    "data['EMI_Loan_Submitted_Missing'] = data['EMI_Loan_Submitted'].apply(lambda x: 1 if pd.isnull(x) else 0)\n",
    "data[['EMI_Loan_Submitted','EMI_Loan_Submitted_Missing']].head(10)"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2025-07-16T06:29:12.780095Z",
     "start_time": "2025-07-16T06:29:12.702482Z"
    }
   },
   "id": "14b400f2747156d4",
   "execution_count": 27
  },
  {
   "cell_type": "markdown",
   "source": [
    "## 数值型处理缺失值"
   ],
   "metadata": {
    "collapsed": false
   },
   "id": "88d41a3361e9e4ce"
  },
  {
   "cell_type": "code",
   "outputs": [
    {
     "data": {
      "text/plain": "count    1.246260e+05\nmean     3.636342e+03\nstd      3.369124e+04\nmin      0.000000e+00\n25%      0.000000e+00\n50%      0.000000e+00\n75%      3.500000e+03\nmax      1.000000e+07\nName: Existing_EMI, dtype: float64"
     },
     "execution_count": 28,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data['Existing_EMI'].describe()"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2025-07-16T06:31:27.667290Z",
     "start_time": "2025-07-16T06:31:27.656155Z"
    }
   },
   "id": "160f18e75a0d80fd",
   "execution_count": 28
  },
  {
   "cell_type": "code",
   "outputs": [],
   "source": [
    "data['Existing_EMI'].fillna(0,inplace=True)"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2025-07-16T06:33:07.141608Z",
     "start_time": "2025-07-16T06:33:07.134583Z"
    }
   },
   "id": "8f232b5dbba8e5a3",
   "execution_count": 30
  },
  {
   "cell_type": "code",
   "outputs": [
    {
     "data": {
      "text/plain": "<Axes: >"
     },
     "execution_count": 31,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "text/plain": "<Figure size 640x480 with 1 Axes>",
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAh8AAAGdCAYAAACyzRGfAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy81sbWrAAAACXBIWXMAAA9hAAAPYQGoP6dpAAAh90lEQVR4nO3de3DTdb7/8VewaWihodbSm9SCiCIKHrkc4YgW3KVSELm4zOwy48K6c8YdEQfQERdXt1W5iKviZZa9OQX1MDCKsKJY23W3oIugdvFYdekqw22liKBtSgtNSj+/P/yRQ9rSNm3ySZs8HzMMzff7TfquTtIn33ySOIwxRgAAAJb0ivQAAAAgthAfAADAKuIDAABYRXwAAACriA8AAGAV8QEAAKwiPgAAgFXEBwAAsCou0gM019TUpCNHjigpKUkOhyPS4wAAgA4wxqi2tlZZWVnq1avtcxvdLj6OHDmi7OzsSI8BAAA64fDhwxowYECbx3S7+EhKSpL0/fButzvC0wAIJZ/Pp5KSEuXl5cnpdEZ6HAAh5PF4lJ2d7f893pZuFx9nn2pxu93EBxBlfD6fEhMT5Xa7iQ8gSnVkyQQLTgEAgFXEBwAAsIr4AAAAVhEfAADAKuIDAABYRXwAAACriA8AAGAV8QEAAKzqdm8yBiA6tfbGQ8aYCEwCINI48wEg7M73jod8eCQQm4gPAGHVXmAQIEDsIT4AhE3zsPB6vdqyZYu8Xm+bxwGIbsQHACuar+9gvQcQu4gPAABgFfEBAACsIj4AWNF8XQfrPIDYxft8AAgbY0xAZMTHx5/3OACxgzMfAMKqvbAgPIDYQ3wACLvzBQbhAcQm4gOAFcaYgPf5IDyA2EV8AAAAq4gPAABgFfEBAACsIj4AAIBVxAcAALCK+AAAAFYRHwAAwCriAwAAWEV8AAAAq/hgOQBWtPYptrzLKRCbOPMBIOxaC4+2tgOIbsQHgLBqLzAIECD2EB8AwqZ5WJz7wXJtHQcguhEfAKxovr6D9R5A7CI+AACAVcQHAACwKqj4WLNmjUaMGCG32y23261x48bprbfe8u+fN2+eHA5HwJ+xY8eGfGgAPU/zdR2s8wBiV1Dv8zFgwACtXLlSl112mSRp3bp1mj59uvbs2aOrrrpKkjR58mQVFRX5rxMfHx/CcQH0JMaYgMg43+MB6z+A2BJUfEybNi3g8rJly7RmzRrt2rXLHx8ul0sZGRmhmxBAj9Y8QFrbDyC2dPodTs+cOaNXXnlFdXV1GjdunH97WVmZ0tLSlJycrNzcXC1btkxpaWnnvZ2GhgY1NDT4L3s8HkmSz+eTz+fr7HgAuhGv19vqWQ+v18v9HIgSwdyXHSbIf3ZUVFRo3LhxOn36tPr27av169drypQpkqSNGzeqb9++ysnJ0f79+/XQQw+psbFR5eXlcrlcrd5eQUGBCgsLW2xfv369EhMTgxkNAABESH19vebMmaOamhq53e42jw06Prxerw4dOqTq6mpt2rRJf/rTn7R9+3YNGzasxbFVVVXKycnRhg0bNGvWrFZvr7UzH9nZ2Tp+/Hi7wwPoWXw+n0pLSzVp0iQ5nc5IjwMghDwej1JTUzsUH0E/7RIfH+9fcDp69Gh9+OGHeuaZZ/T73/++xbGZmZnKycnRF198cd7bc7lcrZ4VcTqdPDgBUYr7NxB9grlPd/l9PowxAWcuznXixAkdPnxYmZmZXf02AAAgSgR15mPp0qXKz89Xdna2amtrtWHDBpWVlam4uFgnT55UQUGBbrvtNmVmZurAgQNaunSpUlNTNXPmzHDNDwAAepig4uPrr7/W7bffrqqqKvXr108jRoxQcXGxJk2apFOnTqmiokIvvviiqqurlZmZqYkTJ2rjxo1KSkoK1/wAAKCHCSo+XnjhhfPuS0hI0Ntvv93lgQAAQHTjs10AAIBVxAcAALCK+AAAAFYRHwAAwCriAwAAWEV8AAAAq4gPAABgFfEBAACsIj4AAIBVxAcAALCK+AAAAFYRHwAAwCriAwAAWEV8AAAAq4gPAABgVVykBwAQGxwOR4ttxpgITAIg0jjzASDsWguPtrYDiG7EB4Cwai8wCBAg9hAfAMKmeVh4vV5t2bJFXq+3zeMARDfiA4AVzdd3sN4DiF3EBwAAsIr4AAAAVvFSWwBWsK4DwFmc+QAQNh1d18H6DyC2EB8AAMAq4gNA2HT0qRaekgFiC/EBwApjTMD7fPBUCxC7iA8AAGAV8QEAAKwiPgBY0XxdB+s8gNjF+3wACBtjTEBkxMfHn/c4ALGDMx8Awqq9sCA8gNhDfAAIu/MFBuEBxCbiA4AVvNQWwFnEBwAAsIoFpwCsaO3VLZz9AGITZz4AhN35XlbLy22B2ER8AAir9gKDAAFiD/EBIGyah8W5C07bOg5AdCM+AFjRfH0H6z2A2EV8AAAAq4gPAABgFS+1BWAF6zoAnMWZDwBh09F1Haz/AGIL8QEAAKwiPgCEzdSpU0N6HIDoQHwACJtt27b5v27rg+XOPQ5A9CM+AACAVcQHAACwivgAEDZTpkzxfz116lTFx8drxowZio+PD1jnce5xAKKfw3Sz17h5PB7169dPNTU1crvdkR4HQBd15P09utnDEIBOCOb3N2c+AACAVcQHgLDZvHmz/+vrrrsuYN+5l889DkD042kXAGFz7lMuxhj5fD5t27ZNU6ZMkdPpbLEfQM/F0y4AupUbbrih1e3Nz4YAiA3EB4Cwe/fdd1vdvnv3bsuTAOgOgoqPNWvWaMSIEXK73XK73Ro3bpzeeust/35jjAoKCpSVlaWEhARNmDBBn332WciHBtAzvPbaa/6vP/jgg4B9514+9zgA0S+oNR9bt27VBRdcoMsuu0yStG7dOj3xxBPas2ePrrrqKj3++ONatmyZ1q5dq8svv1yPPfaYduzYocrKSiUlJXXoe7DmA4guzV9qO3jwYO3bty9gG+s9gJ4vmN/fXV5wmpKSoieeeEJ33HGHsrKytHDhQi1ZskSS1NDQoPT0dD3++OO68847Qz48gJ6hrff6IDyA6GBlwemZM2e0YcMG1dXVady4cdq/f7+OHj2qvLw8/zEul0u5ubnauXNnZ78NgChgjGnx1Mprr71GeAAxKi7YK1RUVGjcuHE6ffq0+vbtq82bN2vYsGH+wEhPTw84Pj09XQcPHjzv7TU0NKihocF/2ePxSJJ8Pp98Pl+w4wHopm655RbV1dWptLRUkyZNktPp5D4ORJFg7s9Bx8cVV1yhjz/+WNXV1dq0aZPmzp2r7du3+/c3P71qjGnzlOuKFStUWFjYYntJSYkSExODHQ9AD1BaWhrpEQCEWH19fYeP7fKajx/+8IcaPHiwlixZosGDB+sf//iHrr32Wv/+6dOnKzk5WevWrWv1+q2d+cjOztbx48dZ8wFEkeeee0733nuv//KTTz6pBQsWRHAiAKHk8XiUmpraoTUfQZ/5aM4Yo4aGBg0aNEgZGRkqLS31x4fX69X27dv1+OOPn/f6LpdLLperxXan0ymn09nV8QB0A62d/bz33nt17733su4DiBLB/M4OKj6WLl2q/Px8ZWdnq7a2Vhs2bFBZWZmKi4vlcDi0cOFCLV++XEOGDNGQIUO0fPlyJSYmas6cOUH/EACiQ/PwSE5OVnV1dcB+AgSILUG92uXrr7/W7bffriuuuEI/+MEPtHv3bhUXF2vSpEmSpPvvv18LFy7UXXfdpdGjR+urr75SSUlJh9/jA0B0eeaZZ/xfb9q0SV6vV2vXrpXX69WmTZtaPQ5A9OOD5QCEDR8sB8QOPlgOQLeSlZXV6vb+/ftbngRAd0B8AAi7I0eOtLr9m2++sTwJgO6A+AAQNqtXr/Z/nZubq/j4eM2YMUPx8fHKzc1t9TgA0Y81HwDCqq03GTyrmz0MAegE1nwAAIBui/gAEDZz5871f33jjTcG7Dv38rnHAYh+PO0CIGx4qS0QO3jaBQAAdFvEBwAAsIr4ABA2P/3pT/1fOxyOgJfanvuUy7nHAYh+rPkAEFa81BaIDaz5AAAA3RbxASBsOnLWI5jjAEQH4gOAFcYYeb1ebdmyRV6vl6dagBhGfAAAAKviIj0AgNjw+eefa/jw4WpqalKvXr1UUVER6ZEARAjxAcCKq666yv91U1NTwGUAsYWnXQCETUfXdbD+A4gtxAeAsPn8889DehyA6EB8AAib4cOHS5KcTmer+89uP3scgNhAfAAIm6amJknSAw880OpLbRctWhRwHIDYQHwACJtevb5/iFm5cmWr+59++umA4wDEBu7xAMLm7MtpfT6fHnzwwYAPlnvwwQfl8/kCjgMQG/hgOQBhxQfLAbGBD5YDAADdFvEBIGyee+45/9erVq3yr+3o1auXVq1a1epxAKIfT7sACJtzn3Ixxsjn82nbtm2aMmWKnE5ni/0Aei6edgHQrQwYMKDV7enp6ZYnAdAdEB8Awu7f//53q9u//vpry5MA6A6IDwBh8+yzz/q/3rJlS8C+cy+fexyA6MeaDwBh1fyltm63Wx6PJ2BbN3sYAtAJrPkA0G00DwvCAwDxASDsjDEtnlp59tlnCQ8gRvG0C4B21dfXa+/evV2+nZOnGvTm397X1Inj1DfBFYLJpKFDhyoxMTEktwWg84L5/R1naSYAPdjevXs1atSokN3eqvYP6bDy8nKNHDkyhLcIINyIDwDtGjp0qMrLy7t8O5VV1Vr8SoWemj1cV2Qmd30wfT8bgJ6F+ADQrsTExJCcXeh18IRc757SlVdfo//IuSgEkwHoiVhwCgAArCI+AACAVcQHAACwivgAAABWER8AAMAq4gMAAFhFfAAAAKuIDwAAYBXxAQAArCI+AACAVcQHAACwivgAAABWER8AAMAq4gMAAFhFfAAAAKuIDwAAYBXxAQAArCI+AACAVcQHAACwivgAAABWBRUfK1as0JgxY5SUlKS0tDTNmDFDlZWVAcfMmzdPDocj4M/YsWNDOjQAAOi5goqP7du3a/78+dq1a5dKS0vV2NiovLw81dXVBRw3efJkVVVV+f9s27YtpEMDAICeKy6Yg4uLiwMuFxUVKS0tTeXl5brxxhv9210ulzIyMkIzIQAAiCpBxUdzNTU1kqSUlJSA7WVlZUpLS1NycrJyc3O1bNkypaWltXobDQ0Namho8F/2eDySJJ/PJ5/P15XxAHQzjY2N/r+5fwPRJZj7dKfjwxijxYsXa/z48br66qv92/Pz8zV79mzl5ORo//79euihh3TTTTepvLxcLperxe2sWLFChYWFLbaXlJQoMTGxs+MB6IYOn5SkOO3atUtffRrpaQCEUn19fYePdRhjTGe+yfz58/Xmm2/qvffe04ABA857XFVVlXJycrRhwwbNmjWrxf7WznxkZ2fr+PHjcrvdnRkNQDf1v4e+1Y/++JFe/e/RuuaSlPavAKDH8Hg8Sk1NVU1NTbu/vzt15mPBggV6/fXXtWPHjjbDQ5IyMzOVk5OjL774otX9Lper1TMiTqdTTqezM+MB6Kbi4uL8f3P/BqJLMPfpoOLDGKMFCxZo8+bNKisr06BBg9q9zokTJ3T48GFlZmYG860AAECUCuqltvPnz9fLL7+s9evXKykpSUePHtXRo0d16tQpSdLJkyd133336f3339eBAwdUVlamadOmKTU1VTNnzgzLDwAAAHqWoM58rFmzRpI0YcKEgO1FRUWaN2+eLrjgAlVUVOjFF19UdXW1MjMzNXHiRG3cuFFJSUkhGxoAAPRcQT/t0paEhAS9/fbbXRoIAABENz7bBQAAWEV8AAAAq4gPAABgFfEBAACsIj4AAIBVxAcAALCK+AAAAFYRHwAAwCriAwAAWEV8AAAAq4gPAABgFfEBAACsIj4AAIBVxAcAALCK+AAAAFYRHwAAwCriAwAAWEV8AAAAq4gPAABgFfEBAACsIj4AAIBVxAcAALCK+AAAAFYRHwAAwCriAwAAWEV8AAAAq4gPAABgFfEBAACsIj4AAIBVxAcAALCK+AAAAFYRHwAAwCriAwAAWEV8AAAAq4gPAABgFfEBAACsIj4AAIBVxAcAALCK+AAAAFYRHwAAwCriAwAAWEV8AAAAq4gPAABgFfEBAACsIj4AAIBVxAcAALCK+AAAAFYRHwAAwCriAwAAWEV8AAAAq4gPAABgFfEBAACsIj4AAIBVxAcAALCK+AAAAFYRHwAAwKqg4mPFihUaM2aMkpKSlJaWphkzZqiysjLgGGOMCgoKlJWVpYSEBE2YMEGfffZZSIcGAAA9V1DxsX37ds2fP1+7du1SaWmpGhsblZeXp7q6Ov8xq1at0lNPPaXnn39eH374oTIyMjRp0iTV1taGfHgAANDzxAVzcHFxccDloqIipaWlqby8XDfeeKOMMVq9erUefPBBzZo1S5K0bt06paena/369brzzjtDNzkAAOiRgoqP5mpqaiRJKSkpkqT9+/fr6NGjysvL8x/jcrmUm5urnTt3thofDQ0Namho8F/2eDySJJ/PJ5/P15XxgJh34ESd6hrORHoMv38drQn4uzvp47pAAy/qE+kxgB4rmN/ZnY4PY4wWL16s8ePH6+qrr5YkHT16VJKUnp4ecGx6eroOHjzY6u2sWLFChYWFLbaXlJQoMTGxs+MBMe/YKWnZx13690XY3L/5n5EeoVUP/kej0hIiPQXQM9XX13f42E4/Mt1999365JNP9N5777XY53A4Ai4bY1psO+uXv/ylFi9e7L/s8XiUnZ2tvLw8ud3uzo4HxLzPjnikj3fpNz8arsv6d49/0dedblDxux9q8g1j1Ke3K9Lj+H35TZ3ue7VCY8aN11VZPO4AnXH2mYuO6FR8LFiwQK+//rp27NihAQMG+LdnZGRI+v4MSGZmpn/7sWPHWpwNOcvlcsnlavkg5HQ65XQ6OzMeAElxcd/fvYdm9tPVF/eL8DTf8/l8Or5X+s9L+3er+/fZ/1ZxcXHdai6gJwnmvhPUq12MMbr77rv12muv6a9//asGDRoUsH/QoEHKyMhQaWmpf5vX69X27dv1X//1X8F8KwAAEKWCOvMxf/58rV+/Xn/+85+VlJTkX+PRr18/JSQkyOFwaOHChVq+fLmGDBmiIUOGaPny5UpMTNScOXPC8gMAAICeJaj4WLNmjSRpwoQJAduLioo0b948SdL999+vU6dO6a677tJ3332n6667TiUlJUpKSgrJwAAAoGcLKj6MMe0e43A4VFBQoIKCgs7OBAAAohif7QIAAKwiPgAAgFXEBwAAsIr4AAAAVhEfAADAKuIDAABYRXwAAACriA8AAGAV8QEAAKwiPgAAgFXEBwAAsIr4AAAAVhEfAADAKuIDAABYRXwAAACriA8AAGAV8QEAAKwiPgAAgFXEBwAAsCou0gMACB9HnEf7PZXq1btvpEeRJDU2NupI4xH989t/Ki6u+zz87PeclCPOE+kxgJjRfe79AELOmbxbSz9YHukxWvht8W8jPUILzuQfSJoS6TGAmEB8AFHMV32dnpw6R4PTus+Zj7+/93ddP/76bnXmY9+xk7rnf/ZFegwgZnSfez+AkDONbg1yX6FhF/WL9CiSJJ/Pp/1x+3VlypVyOp2RHsev6XSNTOM3kR4DiBksOAUAAFYRHwAAwCriAwAAWEV8AAAAq4gPAABgFfEBAACsIj4AAIBVxAcAALCK+AAAAFYRHwAAwCriAwAAWEV8AAAAq4gPAABgFfEBAACsIj4AAIBVxAcAALCK+AAAAFYRHwAAwCriAwAAWEV8AAAAq4gPAABgFfEBAACsIj4AAIBVxAcAALCK+AAAAFYRHwAAwCriAwAAWEV8AAAAq4gPAABgFfEBAACsIj4AAIBVxAcAALCK+AAAAFYFHR87duzQtGnTlJWVJYfDoS1btgTsnzdvnhwOR8CfsWPHhmpeAADQw8UFe4W6ujpdc801+tnPfqbbbrut1WMmT56soqIi/+X4+PjOTwigU075zkiSPv2qJsKT/J+6Uw366Bsp4+B36pPgivQ4fl8eOxnpEYCYEnR85OfnKz8/v81jXC6XMjIyOj0UgK7b9/9/oT7wWkWEJ2kuTi99+WGkh2hVH1fQD4kAOiEs97SysjKlpaUpOTlZubm5WrZsmdLS0lo9tqGhQQ0NDf7LHo9HkuTz+eTz+cIxHhATJl5+kZZNH6ZL+/dRgvOCSI8jSfrX0Rrdv/mfWjXzSl2e0S/S4wTo47pAA/rF87gDdFIw952Qx0d+fr5mz56tnJwc7d+/Xw899JBuuukmlZeXy+VqeZp1xYoVKiwsbLG9pKREiYmJoR4PiCl9JR07Fukp/s/3J2PidOzLCrmORnqalj6P9ABAD1ZfX9/hYx3GGNPZb+RwOLR582bNmDHjvMdUVVUpJydHGzZs0KxZs1rsb+3MR3Z2to4fPy63293Z0QB0Q/976Fv96I8f6dX/Hq1rLkmJ9DgAQsjj8Sg1NVU1NTXt/v4O+xOcmZmZysnJ0RdffNHqfpfL1eoZEafTKafTGe7xAFgUFxfn/5v7NxBdgrlPh/19Pk6cOKHDhw8rMzMz3N8KAAD0AEGf+Th58qS+/PJL/+X9+/fr448/VkpKilJSUlRQUKDbbrtNmZmZOnDggJYuXarU1FTNnDkzpIMDAICeKej4+OijjzRx4kT/5cWLF0uS5s6dqzVr1qiiokIvvviiqqurlZmZqYkTJ2rjxo1KSkoK3dQAAKDHCjo+JkyYoLbWqL799ttdGggAAEQ3PtsFAABYRXwAAACriA8AAGAV8QEAAKwiPgAAgFXEBwAAsIr4AAAAVhEfAADAKuIDAABYRXwAAACriA8AAGAV8QEAAKwiPgAAgFXEBwAAsIr4AAAAVhEfAADAKuIDAABYRXwAAACriA8AAGAV8QEAAKwiPgAAgFXEBwAAsIr4AAAAVhEfAADAKuIDAABYRXwAAACriA8AAGAV8QEAAKwiPgAAgFXEBwAAsIr4AAAAVhEfAADAqrhIDwCg+6uvr9fevXu7fDuVVdVqOPql/vlpgppOJHd9MElDhw5VYmJiSG4LgB3EB4B27d27V6NGjQrZ7c1ZF7KbUnl5uUaOHBm6GwQQdsQHgHYNHTpU5eXlXb6dk6ca9Obf3tfUiePUN8EVgsm+nw1Az0J8AGhXYmJiSM4u+Hw+fXf8mMb952g5nc4QTAagJ2LBKQAAsIr4AAAAVhEfAADAKuIDAABYRXwAAACriA8AAGAV8QEAAKwiPgAAgFXEBwAAsIr4AAAAVhEfAADAKuIDAABYRXwAAACrut2n2hpjJEkejyfCkwAINZ/Pp/r6enk8Hj7VFogyZ39vn/093pZuFx+1tbWSpOzs7AhPAgAAglVbW6t+/fq1eYzDdCRRLGpqatKRI0eUlJQkh8MR6XEAhJDH41F2drYOHz4st9sd6XEAhJAxRrW1tcrKylKvXm2v6uh28QEgenk8HvXr1081NTXEBxDDWHAKAACsIj4AAIBVxAcAa1wul37961/L5XJFehQAEcSaDwAAYBVnPgAAgFXEBwAAsIr4AAAAVhEfAADAKuID6OHmzZunGTNmdPh4h8OhLVu2hG2eYAwcOFCrV6/u8PFlZWVyOBz+PxdddJFuuukm/f3vfw/q+569nerq6uAGBhASxAeATvH5fBH73pWVlaqqqlJZWZn69++vqVOn6tixYxGbB0BwiA8gikyYMEH33HOP7r//fqWkpCgjI0MFBQX+/QMHDpQkzZw5Uw6Hw39ZkrZu3apRo0apd+/euvTSS1VYWKjGxkb/fofDod/97neaPn26+vTpo8cee6xD1ysoKNAll1wil8ulrKws3XPPPf5ZDx48qEWLFvnPZHRUWlqaMjIyNHz4cP3qV79STU2Ndu/e7d//8ssva/To0UpKSlJGRobmzJnjj5MDBw5o4sSJkqQLL7xQDodD8+bNk/T9Z1OsWrVKl156qRISEnTNNdfo1Vdf7fBcADrIAOjR5s6da6ZPn26MMSY3N9e43W5TUFBg/vWvf5l169YZh8NhSkpKjDHGHDt2zEgyRUVFpqqqyhw7dswYY0xxcbFxu91m7dq1Zt++faakpMQMHDjQFBQU+L+PJJOWlmZeeOEFs2/fPnPgwIF2r/fKK68Yt9tttm3bZg4ePGh2795t/vCHPxhjjDlx4oQZMGCAeeSRR0xVVZWpqqpq92f929/+ZiSZ7777zhhjTF1dnVm0aJGRZN566y3/cS+88ILZtm2b2bdvn3n//ffN2LFjTX5+vjHGmMbGRrNp0yYjyVRWVpqqqipTXV1tjDFm6dKlZujQoaa4uNjs27fPFBUVGZfLZcrKyrrwfwhAc8QH0MM1j4/x48cH7B8zZoxZsmSJ/7Iks3nz5oBjbrjhBrN8+fKAbS+99JLJzMwMuN7ChQuDut6TTz5pLr/8cuP1eludPScnxzz99NPt/oxnnY2PPn36mD59+hiHw2EkmVGjRp33exhjzAcffGAkmdra2oDbORsxxhhz8uRJ07t3b7Nz586A6/785z83P/nJTzo8I4D2xUXohAuAMBkxYkTA5czMzHbXQ5SXl+vDDz/UsmXL/NvOnDmj06dPq76+XomJiZKk0aNHB3W92bNna/Xq1br00ks1efJkTZkyRdOmTVNcXNceet5991316dNHe/bs0ZIlS7R27Vo5nU7//j179qigoEAff/yxvv32WzU1NUmSDh06pGHDhrV6m59//rlOnz6tSZMmBWz3er269tpruzQvgEDEBxBlzv0lLH2/VuPsL9/zaWpqUmFhoWbNmtViX+/evf1f9+nTJ6jrZWdnq7KyUqWlpfrLX/6iu+66S0888YS2b9/eYs5gDBo0SMnJybr88st1+vRpzZw5U59++qlcLpfq6uqUl5envLw8vfzyy+rfv78OHTqkm2++WV6vt83/BpL05ptv6uKLLw7Yx2fRAKFFfAAxxul06syZMwHbRo4cqcrKSl122WVB3VZHrpeQkKBbb71Vt956q+bPn6+hQ4eqoqJCI0eOVHx8fItZgnX77bfrkUce0W9/+1stWrRIe/fu1fHjx7Vy5UplZ2dLkj766KOA68THx0tSwPceNmyYXC6XDh06pNzc3C7NBKBtxAcQYwYOHKh33nlH119/vVwuly688EI9/PDDuuWWW5Sdna3Zs2erV69e+uSTT1RRUeF/VUtr2rve2rVrdebMGV133XVKTEzUSy+9pISEBOXk5Phn2bFjh3784x/L5XIpNTU16J+nV69eWrhwoR577DHdeeeduuSSSxQfH6/nnntOv/jFL/Tpp5/q0UcfDbhOTk6OHA6H3njjDU2ZMkUJCQlKSkrSfffdp0WLFqmpqUnjx4+Xx+PRzp071bdvX82dOzfo2QC0jpfaAjHmySefVGlpqbKzs/1rGW6++Wa98cYbKi0t1ZgxYzR27Fg99dRT/kg4n/aul5ycrD/+8Y+6/vrrNWLECL3zzjvaunWrLrroIknSI488ogMHDmjw4MHq379/p3+mO+64Qz6fT88//7z69++vtWvX6pVXXtGwYcO0cuVK/eY3vwk4/uKLL1ZhYaEeeOABpaen6+6775YkPfroo3r44Ye1YsUKXXnllbr55pu1detWDRo0qNOzAWjJYYwxkR4CAADEDs58AAAAq4gPAN1Gfn6++vbt2+qf5cuXR3o8ACHC0y4Auo2vvvpKp06danVfSkqKUlJSLE8EIByIDwAAYBVPuwAAAKuIDwAAYBXxAQAArCI+AACAVcQHAACwivgAAABWER8AAMAq4gMAAFj1/wDVZox9GLFAxwAAAABJRU5ErkJggg=="
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "data.boxplot(column=['Interest_Rate'],return_type='axes')"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2025-07-16T06:33:48.107215Z",
     "start_time": "2025-07-16T06:33:47.987085Z"
    }
   },
   "id": "1182f9bb558c5741",
   "execution_count": 31
  },
  {
   "cell_type": "code",
   "outputs": [
    {
     "data": {
      "text/plain": "   Interest_Rate  Interest_Rate_Missing\n0            NaN                      1\n1          13.25                      0\n2            NaN                      1\n3            NaN                      1\n4            NaN                      1\n5          13.99                      0\n6            NaN                      1\n7            NaN                      1\n8          14.85                      0\n9          18.25                      0",
      "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>Interest_Rate</th>\n      <th>Interest_Rate_Missing</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>0</th>\n      <td>NaN</td>\n      <td>1</td>\n    </tr>\n    <tr>\n      <th>1</th>\n      <td>13.25</td>\n      <td>0</td>\n    </tr>\n    <tr>\n      <th>2</th>\n      <td>NaN</td>\n      <td>1</td>\n    </tr>\n    <tr>\n      <th>3</th>\n      <td>NaN</td>\n      <td>1</td>\n    </tr>\n    <tr>\n      <th>4</th>\n      <td>NaN</td>\n      <td>1</td>\n    </tr>\n    <tr>\n      <th>5</th>\n      <td>13.99</td>\n      <td>0</td>\n    </tr>\n    <tr>\n      <th>6</th>\n      <td>NaN</td>\n      <td>1</td>\n    </tr>\n    <tr>\n      <th>7</th>\n      <td>NaN</td>\n      <td>1</td>\n    </tr>\n    <tr>\n      <th>8</th>\n      <td>14.85</td>\n      <td>0</td>\n    </tr>\n    <tr>\n      <th>9</th>\n      <td>18.25</td>\n      <td>0</td>\n    </tr>\n  </tbody>\n</table>\n</div>"
     },
     "execution_count": 33,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data['Interest_Rate_Missing'] = data['Interest_Rate'].apply(lambda x: 1 if pd.isnull(x) else 0)\n",
    "data[['Interest_Rate','Interest_Rate_Missing']].head(10)"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2025-07-16T06:34:12.374339Z",
     "start_time": "2025-07-16T06:34:12.296249Z"
    }
   },
   "id": "6dd17f0edd4cbc8f",
   "execution_count": 33
  },
  {
   "cell_type": "code",
   "outputs": [],
   "source": [
    "# 中位数填充缺失值\n",
    "data['Loan_Amount_Applied'].fillna(data['Loan_Amount_Applied'].median(),inplace=True)\n",
    "data['Loan_Tenure_Applied'].fillna(data['Loan_Tenure_Applied'].median(),inplace=True)"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2025-07-16T06:35:41.900461Z",
     "start_time": "2025-07-16T06:35:41.890929Z"
    }
   },
   "id": "98f89c881be2480a",
   "execution_count": 34
  },
  {
   "cell_type": "code",
   "outputs": [
    {
     "data": {
      "text/plain": "            ID  Gender  Monthly_Income        DOB Lead_Creation_Date  \\\n0  ID000002C20  Female           20000  23-May-78          15-May-15   \n1  ID000004E40    Male           35000  07-Oct-85          04-May-15   \n2  ID000007H20    Male           22500  10-Oct-81          19-May-15   \n3  ID000008I30    Male           35000  30-Nov-87          09-May-15   \n4  ID000009J40    Male          100000  17-Feb-84          20-May-15   \n\n   Loan_Amount_Applied  Loan_Tenure_Applied Existing_EMI  \\\n0             300000.0                  5.0          0.0   \n1             200000.0                  2.0          0.0   \n2             600000.0                  4.0          0.0   \n3            1000000.0                  5.0          0.0   \n4             500000.0                  2.0      25000.0   \n\n                         Employer_Name       Salary_Account  ...  Device_Type  \\\n0                              CYBOSOL            HDFC Bank  ...  Web-browser   \n1  TATA CONSULTANCY SERVICES LTD (TCS)           ICICI Bank  ...  Web-browser   \n2              ALCHEMIST HOSPITALS LTD  State Bank of India  ...  Web-browser   \n3                     BIHAR GOVERNMENT  State Bank of India  ...  Web-browser   \n4                 GLOBAL EDGE SOFTWARE            HDFC Bank  ...  Web-browser   \n\n  Var2 Source  Var4  LoggedIn  Disbursed  source  Age  \\\n0    G   S122     1       0.0        0.0   train   37   \n1    G   S122     3       0.0        0.0   train   30   \n2    B   S143     1       0.0        0.0   train   34   \n3    B   S143     3       0.0        0.0   train   28   \n4    B   S134     3       1.0        0.0   train   31   \n\n  EMI_Loan_Submitted_Missing Interest_Rate_Missing  \n0                          1                     1  \n1                          0                     0  \n2                          1                     1  \n3                          1                     1  \n4                          1                     1  \n\n[5 rows x 29 columns]",
      "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>ID</th>\n      <th>Gender</th>\n      <th>Monthly_Income</th>\n      <th>DOB</th>\n      <th>Lead_Creation_Date</th>\n      <th>Loan_Amount_Applied</th>\n      <th>Loan_Tenure_Applied</th>\n      <th>Existing_EMI</th>\n      <th>Employer_Name</th>\n      <th>Salary_Account</th>\n      <th>...</th>\n      <th>Device_Type</th>\n      <th>Var2</th>\n      <th>Source</th>\n      <th>Var4</th>\n      <th>LoggedIn</th>\n      <th>Disbursed</th>\n      <th>source</th>\n      <th>Age</th>\n      <th>EMI_Loan_Submitted_Missing</th>\n      <th>Interest_Rate_Missing</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>0</th>\n      <td>ID000002C20</td>\n      <td>Female</td>\n      <td>20000</td>\n      <td>23-May-78</td>\n      <td>15-May-15</td>\n      <td>300000.0</td>\n      <td>5.0</td>\n      <td>0.0</td>\n      <td>CYBOSOL</td>\n      <td>HDFC Bank</td>\n      <td>...</td>\n      <td>Web-browser</td>\n      <td>G</td>\n      <td>S122</td>\n      <td>1</td>\n      <td>0.0</td>\n      <td>0.0</td>\n      <td>train</td>\n      <td>37</td>\n      <td>1</td>\n      <td>1</td>\n    </tr>\n    <tr>\n      <th>1</th>\n      <td>ID000004E40</td>\n      <td>Male</td>\n      <td>35000</td>\n      <td>07-Oct-85</td>\n      <td>04-May-15</td>\n      <td>200000.0</td>\n      <td>2.0</td>\n      <td>0.0</td>\n      <td>TATA CONSULTANCY SERVICES LTD (TCS)</td>\n      <td>ICICI Bank</td>\n      <td>...</td>\n      <td>Web-browser</td>\n      <td>G</td>\n      <td>S122</td>\n      <td>3</td>\n      <td>0.0</td>\n      <td>0.0</td>\n      <td>train</td>\n      <td>30</td>\n      <td>0</td>\n      <td>0</td>\n    </tr>\n    <tr>\n      <th>2</th>\n      <td>ID000007H20</td>\n      <td>Male</td>\n      <td>22500</td>\n      <td>10-Oct-81</td>\n      <td>19-May-15</td>\n      <td>600000.0</td>\n      <td>4.0</td>\n      <td>0.0</td>\n      <td>ALCHEMIST HOSPITALS LTD</td>\n      <td>State Bank of India</td>\n      <td>...</td>\n      <td>Web-browser</td>\n      <td>B</td>\n      <td>S143</td>\n      <td>1</td>\n      <td>0.0</td>\n      <td>0.0</td>\n      <td>train</td>\n      <td>34</td>\n      <td>1</td>\n      <td>1</td>\n    </tr>\n    <tr>\n      <th>3</th>\n      <td>ID000008I30</td>\n      <td>Male</td>\n      <td>35000</td>\n      <td>30-Nov-87</td>\n      <td>09-May-15</td>\n      <td>1000000.0</td>\n      <td>5.0</td>\n      <td>0.0</td>\n      <td>BIHAR GOVERNMENT</td>\n      <td>State Bank of India</td>\n      <td>...</td>\n      <td>Web-browser</td>\n      <td>B</td>\n      <td>S143</td>\n      <td>3</td>\n      <td>0.0</td>\n      <td>0.0</td>\n      <td>train</td>\n      <td>28</td>\n      <td>1</td>\n      <td>1</td>\n    </tr>\n    <tr>\n      <th>4</th>\n      <td>ID000009J40</td>\n      <td>Male</td>\n      <td>100000</td>\n      <td>17-Feb-84</td>\n      <td>20-May-15</td>\n      <td>500000.0</td>\n      <td>2.0</td>\n      <td>25000.0</td>\n      <td>GLOBAL EDGE SOFTWARE</td>\n      <td>HDFC Bank</td>\n      <td>...</td>\n      <td>Web-browser</td>\n      <td>B</td>\n      <td>S134</td>\n      <td>3</td>\n      <td>1.0</td>\n      <td>0.0</td>\n      <td>train</td>\n      <td>31</td>\n      <td>1</td>\n      <td>1</td>\n    </tr>\n  </tbody>\n</table>\n<p>5 rows × 29 columns</p>\n</div>"
     },
     "execution_count": 35,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data.head()"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2025-07-16T06:35:55.797021Z",
     "start_time": "2025-07-16T06:35:55.776972Z"
    }
   },
   "id": "cd24a969b9236c4f",
   "execution_count": 35
  },
  {
   "cell_type": "code",
   "outputs": [],
   "source": [
    "# 缺省值太多。。。是否缺省。。。\n",
    "data['Loan_Amount_Submitted_Missing'] = data['Loan_Amount_Submitted'].apply(lambda x: 1 if pd.isnull(x) else 0)\n",
    "data['Loan_Tenure_Submitted_Missing'] = data['Loan_Tenure_Submitted'].apply(lambda x: 1 if pd.isnull(x) else 0)"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2025-07-16T06:36:26.916415Z",
     "start_time": "2025-07-16T06:36:26.773060Z"
    }
   },
   "id": "4c267db2fa3ede58",
   "execution_count": 36
  },
  {
   "cell_type": "code",
   "outputs": [
    {
     "data": {
      "text/plain": "Source\nS122      55248\nS133      42900\nothers    26589\nName: count, dtype: int64"
     },
     "execution_count": 37,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data['Source'] = data['Source'].apply(lambda x: 'others' if x not in ['S122','S133'] else x)\n",
    "data['Source'].value_counts()"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2025-07-16T06:37:11.788561Z",
     "start_time": "2025-07-16T06:37:11.760657Z"
    }
   },
   "id": "e11377e9af9306aa",
   "execution_count": 37
  },
  {
   "cell_type": "code",
   "outputs": [
    {
     "data": {
      "text/plain": "ID                                   0\nGender                               0\nMonthly_Income                       0\nDOB                                  0\nLead_Creation_Date                   0\nLoan_Amount_Applied                  0\nLoan_Tenure_Applied                  0\nExisting_EMI                         0\nEmployer_Name                      113\nSalary_Account                   16801\nMobile_Verified                      0\nVar5                                 0\nVar1                                 1\nLoan_Amount_Submitted            49535\nLoan_Tenure_Submitted            49535\nInterest_Rate                    84901\nProcessing_Fee                   85346\nEMI_Loan_Submitted               84901\nFilled_Form                          0\nDevice_Type                          0\nVar2                                 0\nSource                               0\nVar4                                 0\nLoggedIn                         37717\nDisbursed                        37718\nsource                               0\nAge                                  0\nEMI_Loan_Submitted_Missing           0\nInterest_Rate_Missing                0\nLoan_Amount_Submitted_Missing        0\nLoan_Tenure_Submitted_Missing        0\ndtype: int64"
     },
     "execution_count": 38,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data.apply(lambda x: sum(x.isnull()))"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2025-07-16T06:37:43.928709Z",
     "start_time": "2025-07-16T06:37:43.631503Z"
    }
   },
   "id": "827c10307021546f",
   "execution_count": 38
  },
  {
   "cell_type": "markdown",
   "source": [
    "# 数值编码"
   ],
   "metadata": {
    "collapsed": false
   },
   "id": "bbba21736969b11a"
  },
  {
   "cell_type": "code",
   "outputs": [
    {
     "data": {
      "text/plain": "   Gender  Mobile_Verified  Var1  Filled_Form  Device_Type  Source\n0       0                1    13            1            2       0\n1       1                2     8            1            2       0\n2       1                2    13            1            2       2\n3       1                2    13            1            2       2\n4       1                2    13            1            2       2",
      "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>Gender</th>\n      <th>Mobile_Verified</th>\n      <th>Var1</th>\n      <th>Filled_Form</th>\n      <th>Device_Type</th>\n      <th>Source</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>0</th>\n      <td>0</td>\n      <td>1</td>\n      <td>13</td>\n      <td>1</td>\n      <td>2</td>\n      <td>0</td>\n    </tr>\n    <tr>\n      <th>1</th>\n      <td>1</td>\n      <td>2</td>\n      <td>8</td>\n      <td>1</td>\n      <td>2</td>\n      <td>0</td>\n    </tr>\n    <tr>\n      <th>2</th>\n      <td>1</td>\n      <td>2</td>\n      <td>13</td>\n      <td>1</td>\n      <td>2</td>\n      <td>2</td>\n    </tr>\n    <tr>\n      <th>3</th>\n      <td>1</td>\n      <td>2</td>\n      <td>13</td>\n      <td>1</td>\n      <td>2</td>\n      <td>2</td>\n    </tr>\n    <tr>\n      <th>4</th>\n      <td>1</td>\n      <td>2</td>\n      <td>13</td>\n      <td>1</td>\n      <td>2</td>\n      <td>2</td>\n    </tr>\n  </tbody>\n</table>\n</div>"
     },
     "execution_count": 40,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from sklearn.preprocessing import LabelEncoder\n",
    "le = LabelEncoder()\n",
    "var_to_encode = ['Gender', 'Mobile_Verified', 'Var1','Filled_Form','Device_Type','Source']\n",
    "for col in var_to_encode:\n",
    "    data[col] = le.fit_transform(data[col])\n",
    "data[var_to_encode].head()"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2025-07-16T06:41:13.844797Z",
     "start_time": "2025-07-16T06:41:13.740231Z"
    }
   },
   "id": "33f90b453c134cc8",
   "execution_count": 40
  },
  {
   "cell_type": "markdown",
   "source": [
    "## 进行One-Hot编码  "
   ],
   "metadata": {
    "collapsed": false
   },
   "id": "f810fbad4f8e3182"
  },
  {
   "cell_type": "code",
   "outputs": [
    {
     "data": {
      "text/plain": "Index(['ID', 'Monthly_Income', 'DOB', 'Lead_Creation_Date',\n       'Loan_Amount_Applied', 'Loan_Tenure_Applied', 'Existing_EMI',\n       'Employer_Name', 'Salary_Account', 'Var5', 'Loan_Amount_Submitted',\n       'Loan_Tenure_Submitted', 'Interest_Rate', 'Processing_Fee',\n       'EMI_Loan_Submitted', 'Var2', 'Var4', 'LoggedIn', 'Disbursed', 'source',\n       'Age', 'EMI_Loan_Submitted_Missing', 'Interest_Rate_Missing',\n       'Loan_Amount_Submitted_Missing', 'Loan_Tenure_Submitted_Missing',\n       'Gender_0', 'Gender_1', 'Mobile_Verified_0', 'Mobile_Verified_1',\n       'Mobile_Verified_2', 'Var1_0', 'Var1_1', 'Var1_2', 'Var1_3', 'Var1_4',\n       'Var1_5', 'Var1_6', 'Var1_7', 'Var1_8', 'Var1_9', 'Var1_10', 'Var1_11',\n       'Var1_12', 'Var1_13', 'Var1_14', 'Var1_15', 'Var1_16', 'Var1_17',\n       'Var1_18', 'Var1_19', 'Filled_Form_0', 'Filled_Form_1', 'Filled_Form_2',\n       'Device_Type_0', 'Device_Type_1', 'Device_Type_2', 'Source_0',\n       'Source_1', 'Source_2'],\n      dtype='object')"
     },
     "execution_count": 42,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data = pd.get_dummies(data,columns=var_to_encode)\n",
    "data.columns"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2025-07-16T06:43:21.448938Z",
     "start_time": "2025-07-16T06:43:21.342529Z"
    }
   },
   "id": "b0139905b3f790d6",
   "execution_count": 42
  },
  {
   "cell_type": "code",
   "outputs": [
    {
     "data": {
      "text/plain": "            ID  Monthly_Income        DOB Lead_Creation_Date  \\\n0  ID000002C20           20000  23-May-78          15-May-15   \n1  ID000004E40           35000  07-Oct-85          04-May-15   \n2  ID000007H20           22500  10-Oct-81          19-May-15   \n3  ID000008I30           35000  30-Nov-87          09-May-15   \n4  ID000009J40          100000  17-Feb-84          20-May-15   \n\n   Loan_Amount_Applied  Loan_Tenure_Applied Existing_EMI  \\\n0             300000.0                  5.0          0.0   \n1             200000.0                  2.0          0.0   \n2             600000.0                  4.0          0.0   \n3            1000000.0                  5.0          0.0   \n4             500000.0                  2.0      25000.0   \n\n                         Employer_Name       Salary_Account Var5  ...  \\\n0                              CYBOSOL            HDFC Bank    0  ...   \n1  TATA CONSULTANCY SERVICES LTD (TCS)           ICICI Bank   13  ...   \n2              ALCHEMIST HOSPITALS LTD  State Bank of India    0  ...   \n3                     BIHAR GOVERNMENT  State Bank of India   10  ...   \n4                 GLOBAL EDGE SOFTWARE            HDFC Bank   17  ...   \n\n   Var1_19  Filled_Form_0  Filled_Form_1  Filled_Form_2  Device_Type_0  \\\n0    False          False           True          False          False   \n1    False          False           True          False          False   \n2    False          False           True          False          False   \n3    False          False           True          False          False   \n4    False          False           True          False          False   \n\n  Device_Type_1  Device_Type_2  Source_0  Source_1 Source_2  \n0         False           True      True     False    False  \n1         False           True      True     False    False  \n2         False           True     False     False     True  \n3         False           True     False     False     True  \n4         False           True     False     False     True  \n\n[5 rows x 59 columns]",
      "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>ID</th>\n      <th>Monthly_Income</th>\n      <th>DOB</th>\n      <th>Lead_Creation_Date</th>\n      <th>Loan_Amount_Applied</th>\n      <th>Loan_Tenure_Applied</th>\n      <th>Existing_EMI</th>\n      <th>Employer_Name</th>\n      <th>Salary_Account</th>\n      <th>Var5</th>\n      <th>...</th>\n      <th>Var1_19</th>\n      <th>Filled_Form_0</th>\n      <th>Filled_Form_1</th>\n      <th>Filled_Form_2</th>\n      <th>Device_Type_0</th>\n      <th>Device_Type_1</th>\n      <th>Device_Type_2</th>\n      <th>Source_0</th>\n      <th>Source_1</th>\n      <th>Source_2</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>0</th>\n      <td>ID000002C20</td>\n      <td>20000</td>\n      <td>23-May-78</td>\n      <td>15-May-15</td>\n      <td>300000.0</td>\n      <td>5.0</td>\n      <td>0.0</td>\n      <td>CYBOSOL</td>\n      <td>HDFC Bank</td>\n      <td>0</td>\n      <td>...</td>\n      <td>False</td>\n      <td>False</td>\n      <td>True</td>\n      <td>False</td>\n      <td>False</td>\n      <td>False</td>\n      <td>True</td>\n      <td>True</td>\n      <td>False</td>\n      <td>False</td>\n    </tr>\n    <tr>\n      <th>1</th>\n      <td>ID000004E40</td>\n      <td>35000</td>\n      <td>07-Oct-85</td>\n      <td>04-May-15</td>\n      <td>200000.0</td>\n      <td>2.0</td>\n      <td>0.0</td>\n      <td>TATA CONSULTANCY SERVICES LTD (TCS)</td>\n      <td>ICICI Bank</td>\n      <td>13</td>\n      <td>...</td>\n      <td>False</td>\n      <td>False</td>\n      <td>True</td>\n      <td>False</td>\n      <td>False</td>\n      <td>False</td>\n      <td>True</td>\n      <td>True</td>\n      <td>False</td>\n      <td>False</td>\n    </tr>\n    <tr>\n      <th>2</th>\n      <td>ID000007H20</td>\n      <td>22500</td>\n      <td>10-Oct-81</td>\n      <td>19-May-15</td>\n      <td>600000.0</td>\n      <td>4.0</td>\n      <td>0.0</td>\n      <td>ALCHEMIST HOSPITALS LTD</td>\n      <td>State Bank of India</td>\n      <td>0</td>\n      <td>...</td>\n      <td>False</td>\n      <td>False</td>\n      <td>True</td>\n      <td>False</td>\n      <td>False</td>\n      <td>False</td>\n      <td>True</td>\n      <td>False</td>\n      <td>False</td>\n      <td>True</td>\n    </tr>\n    <tr>\n      <th>3</th>\n      <td>ID000008I30</td>\n      <td>35000</td>\n      <td>30-Nov-87</td>\n      <td>09-May-15</td>\n      <td>1000000.0</td>\n      <td>5.0</td>\n      <td>0.0</td>\n      <td>BIHAR GOVERNMENT</td>\n      <td>State Bank of India</td>\n      <td>10</td>\n      <td>...</td>\n      <td>False</td>\n      <td>False</td>\n      <td>True</td>\n      <td>False</td>\n      <td>False</td>\n      <td>False</td>\n      <td>True</td>\n      <td>False</td>\n      <td>False</td>\n      <td>True</td>\n    </tr>\n    <tr>\n      <th>4</th>\n      <td>ID000009J40</td>\n      <td>100000</td>\n      <td>17-Feb-84</td>\n      <td>20-May-15</td>\n      <td>500000.0</td>\n      <td>2.0</td>\n      <td>25000.0</td>\n      <td>GLOBAL EDGE SOFTWARE</td>\n      <td>HDFC Bank</td>\n      <td>17</td>\n      <td>...</td>\n      <td>False</td>\n      <td>False</td>\n      <td>True</td>\n      <td>False</td>\n      <td>False</td>\n      <td>False</td>\n      <td>True</td>\n      <td>False</td>\n      <td>False</td>\n      <td>True</td>\n    </tr>\n  </tbody>\n</table>\n<p>5 rows × 59 columns</p>\n</div>"
     },
     "execution_count": 43,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data.head()"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2025-07-16T06:43:47.498967Z",
     "start_time": "2025-07-16T06:43:47.483270Z"
    }
   },
   "id": "db83b8de54d7c7c1",
   "execution_count": 43
  },
  {
   "cell_type": "code",
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "C:\\Users\\Administrator\\AppData\\Local\\Temp\\ipykernel_29220\\2955080101.py:3: SettingWithCopyWarning: \n",
      "A value is trying to be set on a copy of a slice from a DataFrame\n",
      "\n",
      "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
      "  train.drop('source',axis=1,inplace=True)\n",
      "C:\\Users\\Administrator\\AppData\\Local\\Temp\\ipykernel_29220\\2955080101.py:4: SettingWithCopyWarning: \n",
      "A value is trying to be set on a copy of a slice from a DataFrame\n",
      "\n",
      "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
      "  test.drop(['source','Disbursed'],axis=1,inplace=True)\n"
     ]
    }
   ],
   "source": [
    "train = data.loc[data['source']=='train']\n",
    "test = data.loc[data['source']=='test']\n",
    "train.drop('source',axis=1,inplace=True)\n",
    "test.drop(['source','Disbursed'],axis=1,inplace=True)\n",
    "train.to_csv('data/train_modified.csv',index=False)\n",
    "test.to_csv('data/test_modified.csv',index=False)"
   ],
   "metadata": {
    "collapsed": false,
    "ExecuteTime": {
     "end_time": "2025-07-16T06:46:14.196909Z",
     "start_time": "2025-07-16T06:46:12.434858Z"
    }
   },
   "id": "ef7e37d6f536f2f",
   "execution_count": 44
  },
  {
   "cell_type": "code",
   "outputs": [],
   "source": [],
   "metadata": {
    "collapsed": false
   },
   "id": "faac30cd263b0988"
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python [conda env:base] *",
   "language": "python",
   "name": "conda-base-py"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.7"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
